src/os/windows/string_uniscribe.cpp

   1 /*
   2  * This file is part of OpenTTD.
   3  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
   4  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
   5  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
   6  */
   7
   8 /** @file string_uniscribe.cpp Functions related to laying out text on Win32. */
   9
  10 #include "../../stdafx.h"
  11 #include "../../debug.h"
  12 #include "string_uniscribe.h"
  13 #include "../../language.h"
  14 #include "../../strings_func.h"
  15 #include "../../string_func.h"
  16 #include "../../table/control_codes.h"
  17 #include "../../zoom_func.h"
  18 #include "win32.h"
  19
  20 #include <windows.h>
  21 #include <usp10.h>
  22
  23 #include "../../safeguards.h"
  24
  25 #ifdef _MSC_VER
  26 #       pragma comment(lib, "usp10")
  27 #endif
  28
  29
  30 /** Uniscribe cache for internal font information, cleared when OTTD changes fonts. */
  31 static SCRIPT_CACHE _script_cache[FS_END];
  32
  33 /**
  34  * Contains all information about a run of characters. A run are consecutive
  35  * characters that share a single font and language.
  36  */
  37 struct UniscribeRun {
  38         int pos;
  39         int len;
  40         Font *font;
  41
  42         std::vector<GlyphID> ft_glyphs;
  43
  44         SCRIPT_ANALYSIS sa;
  45         std::vector<WORD> char_to_glyph;
  46
  47         std::vector<SCRIPT_VISATTR> vis_attribs;
  48         std::vector<WORD> glyphs;
  49         std::vector<int> advances;
  50         std::vector<GOFFSET> offsets;
  51         int total_advance;
  52
  53         UniscribeRun(int pos, int len, Font *font, SCRIPT_ANALYSIS &sa) : pos(pos), len(len), font(font), sa(sa) {}
  54 };
  55
  56 /** Break a string into language formatting ranges. */
  57 static std::vector<SCRIPT_ITEM> UniscribeItemizeString(UniscribeParagraphLayoutFactory::CharType *buff, int32_t length);
  58 /** Generate and place glyphs for a run of characters. */
  59 static bool UniscribeShapeRun(const UniscribeParagraphLayoutFactory::CharType *buff, UniscribeRun &range);
  60
  61 /**
  62  * Wrapper for doing layouts with Uniscribe.
  63  */
  64 class UniscribeParagraphLayout : public ParagraphLayouter {
  65 private:
  66         const UniscribeParagraphLayoutFactory::CharType *text_buffer;
  67
  68         std::vector<UniscribeRun> ranges; ///< All runs of the text.
  69         std::vector<UniscribeRun>::iterator cur_range; ///< The next run to be output.
  70         int cur_range_offset = 0; ///< Offset from the start of the current run from where to output.
  71
  72 public:
  73         /** Visual run contains data about the bit of text with the same font. */
  74         class UniscribeVisualRun : public ParagraphLayouter::VisualRun {
  75         private:
  76                 std::vector<GlyphID> glyphs;
  77                 std::vector<Point> positions;
  78                 std::vector<WORD> char_to_glyph;
  79
  80                 int start_pos;
  81                 int total_advance;
  82                 int num_glyphs;
  83                 Font *font;
  84
  85                 mutable std::vector<int> glyph_to_char;
  86
  87         public:
  88                 UniscribeVisualRun(const UniscribeRun &range, int x);
  89                 UniscribeVisualRun(UniscribeVisualRun &&other) noexcept;
  90
  91                 const std::vector<GlyphID> &GetGlyphs() const override { return this->glyphs; }
  92                 const std::vector<Point> &GetPositions() const override { return this->positions; }
  93                 const std::vector<int> &GetGlyphToCharMap() const override;
  94
  95                 const Font *GetFont() const override { return this->font;  }
  96                 int GetLeading() const override { return this->font->fc->GetHeight(); }
  97                 int GetGlyphCount() const override { return this->num_glyphs; }
  98                 int GetAdvance() const { return this->total_advance; }
  99         };
 100
 101         /** A single line worth of VisualRuns. */
 102         class UniscribeLine : public std::vector<UniscribeVisualRun>, public ParagraphLayouter::Line {
 103         public:
 104                 int GetLeading() const override;
 105                 int GetWidth() const override;
 106                 int CountRuns() const override { return (uint)this->size();  }
 107                 const VisualRun &GetVisualRun(int run) const override { return this->at(run);  }
 108
 109                 int GetInternalCharLength(char32_t c) const override
 110                 {
 111                         /* Uniscribe uses UTF-16 internally which means we need to account for surrogate pairs. */
 112                         return c >= 0x010000U ? 2 : 1;
 113                 }
 114         };
 115
 116         UniscribeParagraphLayout(std::vector<UniscribeRun> &ranges, const UniscribeParagraphLayoutFactory::CharType *buffer) : text_buffer(buffer), ranges(ranges)
 117         {
 118                 this->Reflow();
 119         }
 120
 121         ~UniscribeParagraphLayout() override {}
 122
 123         void Reflow() override
 124         {
 125                 this->cur_range = this->ranges.begin();
 126                 this->cur_range_offset = 0;
 127         }
 128
 129         std::unique_ptr<const Line> NextLine(int max_width) override;
 130 };
 131
 132 void UniscribeResetScriptCache(FontSize size)
 133 {
 134         if (_script_cache[size] != nullptr) {
 135                 ScriptFreeCache(&_script_cache[size]);
 136                 _script_cache[size] = nullptr;
 137         }
 138 }
 139
 140 /** Load the matching native Windows font. */
 141 static HFONT HFontFromFont(Font *font)
 142 {
 143         if (font->fc->GetOSHandle() != nullptr) return CreateFontIndirect(reinterpret_cast<PLOGFONT>(const_cast<void *>(font->fc->GetOSHandle())));
 144
 145         LOGFONT logfont;
 146         ZeroMemory(&logfont, sizeof(LOGFONT));
 147         logfont.lfHeight = font->fc->GetHeight();
 148         logfont.lfWeight = FW_NORMAL;
 149         logfont.lfCharSet = DEFAULT_CHARSET;
 150         convert_to_fs(font->fc->GetFontName(), logfont.lfFaceName, lengthof(logfont.lfFaceName));
 151
 152         return CreateFontIndirect(&logfont);
 153 }
 154
 155 /** Determine the glyph positions for a run. */
 156 static bool UniscribeShapeRun(const UniscribeParagraphLayoutFactory::CharType *buff, UniscribeRun &range)
 157 {
 158         /* Initial size guess for the number of glyphs recommended by Uniscribe. */
 159         range.glyphs.resize(range.len * 3 / 2 + 16);
 160         range.vis_attribs.resize(range.glyphs.size());
 161
 162         /* The char-to-glyph array is the same size as the input. */
 163         range.char_to_glyph.resize(range.len);
 164
 165         HDC temp_dc = nullptr;
 166         HFONT old_font = nullptr;
 167         HFONT cur_font = nullptr;
 168
 169         while (true) {
 170                 /* Shape the text run by determining the glyphs needed for display. */
 171                 int glyphs_used = 0;
 172                 HRESULT hr = ScriptShape(temp_dc, &_script_cache[range.font->fc->GetSize()], buff + range.pos, range.len, (int)range.glyphs.size(), &range.sa, &range.glyphs[0], &range.char_to_glyph[0], &range.vis_attribs[0], &glyphs_used);
 173
 174                 if (SUCCEEDED(hr)) {
 175                         range.glyphs.resize(glyphs_used);
 176                         range.vis_attribs.resize(glyphs_used);
 177
 178                         /* Calculate the glyph positions. */
 179                         ABC abc;
 180                         range.advances.resize(range.glyphs.size());
 181                         range.offsets.resize(range.glyphs.size());
 182                         hr = ScriptPlace(temp_dc, &_script_cache[range.font->fc->GetSize()], &range.glyphs[0], (int)range.glyphs.size(), &range.vis_attribs[0], &range.sa, &range.advances[0], &range.offsets[0], &abc);
 183                         if (SUCCEEDED(hr)) {
 184                                 /* We map our special sprite chars to values that don't fit into a WORD. Copy the glyphs
 185                                  * into a new vector and query the real glyph to use for these special chars. */
 186                                 range.ft_glyphs.resize(range.glyphs.size());
 187                                 for (size_t g_id = 0; g_id < range.glyphs.size(); g_id++) {
 188                                         range.ft_glyphs[g_id] = range.glyphs[g_id];
 189                                 }
 190                                 for (int i = 0; i < range.len; i++) {
 191                                         if (buff[range.pos + i] >= SCC_SPRITE_START && buff[range.pos + i] <= SCC_SPRITE_END) {
 192                                                 auto pos = range.char_to_glyph[i];
 193                                                 if (range.ft_glyphs[pos] == 0) { // Font doesn't have our special glyph, so remap.
 194                                                         range.ft_glyphs[pos] = range.font->fc->MapCharToGlyph(buff[range.pos + i]);
 195                                                         range.offsets[pos].dv = (range.font->fc->GetHeight() - ScaleSpriteTrad(FontCache::GetDefaultFontHeight(range.font->fc->GetSize()))) / 2; // Align sprite font to centre
 196                                                         range.advances[pos] = range.font->fc->GetGlyphWidth(range.ft_glyphs[pos]);
 197                                                 }
 198                                         }
 199                                 }
 200
 201                                 range.total_advance = 0;
 202                                 for (size_t i = 0; i < range.advances.size(); i++) {
 203 #ifdef WITH_FREETYPE
 204                                         /* FreeType and GDI/Uniscribe seems to occasionally disagree over the width of a glyph. */
 205                                         if (range.advances[i] > 0 && range.ft_glyphs[i] != 0xFFFF) range.advances[i] = range.font->fc->GetGlyphWidth(range.ft_glyphs[i]);
 206 #endif
 207                                         range.total_advance += range.advances[i];
 208                                 }
 209                                 break;
 210                         }
 211                 }
 212
 213                 if (hr == E_OUTOFMEMORY) {
 214                         /* The glyph buffer needs to be larger. Just double it every time. */
 215                         range.glyphs.resize(range.glyphs.size() * 2);
 216                         range.vis_attribs.resize(range.vis_attribs.size() * 2);
 217                 } else if (hr == E_PENDING) {
 218                         /* Glyph data is not in cache, load native font. */
 219                         cur_font = HFontFromFont(range.font);
 220                         if (cur_font == nullptr) return false; // Sorry, no dice.
 221
 222                         temp_dc = CreateCompatibleDC(nullptr);
 223                         SetMapMode(temp_dc, MM_TEXT);
 224                         old_font = (HFONT)SelectObject(temp_dc, cur_font);
 225                 } else if (hr == USP_E_SCRIPT_NOT_IN_FONT && range.sa.eScript != SCRIPT_UNDEFINED) {
 226                         /* Try again with the generic shaping engine. */
 227                         range.sa.eScript = SCRIPT_UNDEFINED;
 228                 } else {
 229                         /* Some unknown other error. */
 230                         if (temp_dc != nullptr) {
 231                                 SelectObject(temp_dc, old_font);
 232                                 DeleteObject(cur_font);
 233                                 ReleaseDC(nullptr, temp_dc);
 234                         }
 235                         return false;
 236                 }
 237         }
 238
 239         if (temp_dc != nullptr) {
 240                 SelectObject(temp_dc, old_font);
 241                 DeleteObject(cur_font);
 242                 ReleaseDC(nullptr, temp_dc);
 243         }
 244
 245         return true;
 246 }
 247
 248 static std::vector<SCRIPT_ITEM> UniscribeItemizeString(UniscribeParagraphLayoutFactory::CharType *buff, int32_t length)
 249 {
 250         /* Itemize text. */
 251         SCRIPT_CONTROL control;
 252         ZeroMemory(&control, sizeof(SCRIPT_CONTROL));
 253         control.uDefaultLanguage = _current_language->winlangid;
 254
 255         SCRIPT_STATE state;
 256         ZeroMemory(&state, sizeof(SCRIPT_STATE));
 257         state.uBidiLevel = _current_text_dir == TD_RTL ? 1 : 0;
 258
 259         std::vector<SCRIPT_ITEM> items(16);
 260         while (true) {
 261                 /* We subtract one from max_items to work around a buffer overflow on some older versions of Windows. */
 262                 int generated = 0;
 263                 HRESULT hr = ScriptItemize(buff, length, (int)items.size() - 1, &control, &state, &items[0], &generated);
 264
 265                 if (SUCCEEDED(hr)) {
 266                         /* Resize the item buffer. Note that Uniscribe will always add an additional end sentinel item. */
 267                         items.resize(generated + 1);
 268                         break;
 269                 }
 270                 /* Some kind of error except item buffer too small. */
 271                 if (hr != E_OUTOFMEMORY) return std::vector<SCRIPT_ITEM>();
 272
 273                 items.resize(items.size() * 2);
 274         }
 275
 276         return items;
 277 }
 278
 279 /* static */ ParagraphLayouter *UniscribeParagraphLayoutFactory::GetParagraphLayout(CharType *buff, CharType *buff_end, FontMap &fontMapping)
 280 {
 281         int32_t length = buff_end - buff;
 282         /* Can't layout an empty string. */
 283         if (length == 0) return nullptr;
 284
 285         /* Can't layout our in-built sprite fonts. */
 286         for (auto const &pair : fontMapping) {
 287                 if (pair.second->fc->IsBuiltInFont()) return nullptr;
 288         }
 289
 290         /* Itemize text. */
 291         std::vector<SCRIPT_ITEM> items = UniscribeItemizeString(buff, length);
 292         if (items.empty()) return nullptr;
 293
 294         /* Build ranges from the items and the font map. A range is a run of text
 295          * that is part of a single item and formatted using a single font style. */
 296         std::vector<UniscribeRun> ranges;
 297
 298         int cur_pos = 0;
 299         std::vector<SCRIPT_ITEM>::iterator cur_item = items.begin();
 300         for (auto const &i : fontMapping) {
 301                 while (cur_pos < i.first && cur_item != items.end() - 1) {
 302                         /* Add a range that spans the intersection of the remaining item and font run. */
 303                         int stop_pos = std::min(i.first, (cur_item + 1)->iCharPos);
 304                         assert(stop_pos - cur_pos > 0);
 305                         ranges.push_back(UniscribeRun(cur_pos, stop_pos - cur_pos, i.second, cur_item->a));
 306
 307                         /* Shape the range. */
 308                         if (!UniscribeShapeRun(buff, ranges.back())) {
 309                                 return nullptr;
 310                         }
 311
 312                         /* If we are at the end of the current item, advance to the next item. */
 313                         if (stop_pos == (cur_item + 1)->iCharPos) cur_item++;
 314                         cur_pos = stop_pos;
 315                 }
 316         }
 317
 318         return new UniscribeParagraphLayout(ranges, buff);
 319 }
 320
 321 /* virtual */ std::unique_ptr<const ParagraphLayouter::Line> UniscribeParagraphLayout::NextLine(int max_width)
 322 {
 323         std::vector<UniscribeRun>::iterator start_run = this->cur_range;
 324         std::vector<UniscribeRun>::iterator last_run = this->cur_range;
 325
 326         if (start_run == this->ranges.end()) return nullptr;
 327
 328         /* Add remaining width of the first run if it is a broken run. */
 329         int cur_width = 0;
 330         if (this->cur_range_offset != 0) {
 331                 std::vector<int> dx(start_run->len);
 332                 ScriptGetLogicalWidths(&start_run->sa, start_run->len, (int)start_run->glyphs.size(), &start_run->advances[0], &start_run->char_to_glyph[0], &start_run->vis_attribs[0], &dx[0]);
 333
 334                 for (std::vector<int>::const_iterator c = dx.begin() + this->cur_range_offset; c != dx.end(); c++) {
 335                         cur_width += *c;
 336                 }
 337                 ++last_run;
 338         }
 339
 340         /* Gather runs until the line is full. */
 341         while (last_run != this->ranges.end() && cur_width <= max_width) {
 342                 cur_width += last_run->total_advance;
 343                 ++last_run;
 344         }
 345
 346         /* If the text does not fit into the available width, find a suitable breaking point. */
 347         int remaining_offset = (last_run - 1)->len + 1;
 348         int whitespace_count = 0;
 349         if (cur_width > max_width) {
 350                 std::vector<SCRIPT_LOGATTR> log_attribs;
 351
 352                 /* Get word break information. */
 353                 int width_avail = max_width;
 354                 int num_chars = this->cur_range_offset;
 355                 int start_offs = this->cur_range_offset;
 356                 int last_cluster = this->cur_range_offset + 1;
 357                 for (std::vector<UniscribeRun>::iterator r = start_run; r != last_run; r++) {
 358                         log_attribs.resize(r->pos - start_run->pos + r->len);
 359                         if (FAILED(ScriptBreak(this->text_buffer + r->pos + start_offs, r->len - start_offs, &r->sa, &log_attribs[r->pos - start_run->pos + start_offs]))) return nullptr;
 360
 361                         std::vector<int> dx(r->len);
 362                         ScriptGetLogicalWidths(&r->sa, r->len, (int)r->glyphs.size(), &r->advances[0], &r->char_to_glyph[0], &r->vis_attribs[0], &dx[0]);
 363
 364                         /* Count absolute max character count on the line. */
 365                         for (int c = start_offs; c < r->len && width_avail > 0; c++, num_chars++) {
 366                                 if (c > start_offs && log_attribs[num_chars].fCharStop) last_cluster = num_chars;
 367                                 width_avail -= dx[c];
 368                         }
 369
 370                         start_offs = 0;
 371                 }
 372
 373                 /* Walk backwards to find the last suitable breaking point. */
 374                 while (--num_chars > this->cur_range_offset && !log_attribs[num_chars].fSoftBreak && !log_attribs[num_chars].fWhiteSpace) {}
 375
 376                 if (num_chars == this->cur_range_offset) {
 377                         /* Didn't find any suitable word break point, just break on the last cluster boundary. */
 378                         num_chars = last_cluster;
 379                 }
 380
 381                 /* Eat any whitespace characters before the breaking point. */
 382                 while (num_chars - 1 > this->cur_range_offset && log_attribs[num_chars - 1].fWhiteSpace) num_chars--;
 383                 /* Count whitespace after the breaking point. */
 384                 while (num_chars + whitespace_count < (int)log_attribs.size() && log_attribs[num_chars + whitespace_count].fWhiteSpace) whitespace_count++;
 385
 386                 /* Get last run that corresponds to the number of characters to show. */
 387                 for (std::vector<UniscribeRun>::iterator run = start_run; run != last_run; run++) {
 388                         num_chars -= run->len;
 389
 390                         if (num_chars <= 0) {
 391                                 remaining_offset = num_chars + run->len + 1;
 392                                 last_run = run + 1;
 393                                 assert(remaining_offset - 1 > 0);
 394                                 break;
 395                         }
 396                 }
 397         }
 398
 399         /* Build display order from the runs. */
 400         std::vector<BYTE> bidi_level;
 401         for (std::vector<UniscribeRun>::iterator r = start_run; r != last_run; r++) {
 402                 bidi_level.push_back(r->sa.s.uBidiLevel);
 403         }
 404         std::vector<INT> vis_to_log(bidi_level.size());
 405         if (FAILED(ScriptLayout((int)bidi_level.size(), &bidi_level[0], &vis_to_log[0], nullptr))) return nullptr;
 406
 407         /* Create line. */
 408         std::unique_ptr<UniscribeLine> line(new UniscribeLine());
 409
 410         int cur_pos = 0;
 411         for (std::vector<INT>::iterator l = vis_to_log.begin(); l != vis_to_log.end(); l++) {
 412                 std::vector<UniscribeRun>::iterator i_run = start_run + *l;
 413                 UniscribeRun run = *i_run;
 414
 415                 /* Partial run after line break (either start or end)? Reshape run to get the first/last glyphs right. */
 416                 if (i_run == last_run - 1 && remaining_offset <= (last_run - 1)->len) {
 417                         run.len = remaining_offset - 1;
 418
 419                         if (!UniscribeShapeRun(this->text_buffer, run)) return nullptr;
 420                 }
 421                 if (i_run == start_run && this->cur_range_offset > 0) {
 422                         assert(run.len - this->cur_range_offset > 0);
 423                         run.pos += this->cur_range_offset;
 424                         run.len -= this->cur_range_offset;
 425
 426                         if (!UniscribeShapeRun(this->text_buffer, run)) return nullptr;
 427                 }
 428
 429                 line->emplace_back(run, cur_pos);
 430                 cur_pos += run.total_advance;
 431         }
 432
 433         if (remaining_offset + whitespace_count - 1 < (last_run - 1)->len) {
 434                 /* We didn't use up all of the last run, store remainder for the next line. */
 435                 this->cur_range_offset = remaining_offset + whitespace_count - 1;
 436                 this->cur_range = last_run - 1;
 437                 assert(this->cur_range->len > this->cur_range_offset);
 438         } else {
 439                 this->cur_range_offset = 0;
 440                 this->cur_range = last_run;
 441         }
 442
 443         return line;
 444 }
 445
 446 /**
 447  * Get the height of the line.
 448  * @return The maximum height of the line.
 449  */
 450 int UniscribeParagraphLayout::UniscribeLine::GetLeading() const
 451 {
 452         int leading = 0;
 453         for (const auto &run : *this) {
 454                 leading = std::max(leading, run.GetLeading());
 455         }
 456
 457         return leading;
 458 }
 459
 460 /**
 461  * Get the width of this line.
 462  * @return The width of the line.
 463  */
 464 int UniscribeParagraphLayout::UniscribeLine::GetWidth() const
 465 {
 466         int length = 0;
 467         for (const auto &run : *this) {
 468                 length += run.GetAdvance();
 469         }
 470
 471         return length;
 472 }
 473
 474 UniscribeParagraphLayout::UniscribeVisualRun::UniscribeVisualRun(const UniscribeRun &range, int x) : glyphs(range.ft_glyphs), char_to_glyph(range.char_to_glyph), start_pos(range.pos), total_advance(range.total_advance), font(range.font)
 475 {
 476         this->num_glyphs = (int)glyphs.size();
 477         this->positions.reserve(this->num_glyphs + 1);
 478
 479         int advance = x;
 480         for (int i = 0; i < this->num_glyphs; i++) {
 481                 this->positions.emplace_back(range.offsets[i].du + advance, range.offsets[i].dv);
 482
 483                 advance += range.advances[i];
 484         }
 485         /* End-of-run position. */
 486         this->positions.emplace_back(advance, 0);
 487 }
 488
 489 UniscribeParagraphLayout::UniscribeVisualRun::UniscribeVisualRun(UniscribeVisualRun&& other) noexcept
 490                                                                 : glyphs(std::move(other.glyphs)), positions(std::move(other.positions)), char_to_glyph(std::move(other.char_to_glyph)),
 491                                                                   start_pos(other.start_pos), total_advance(other.total_advance), num_glyphs(other.num_glyphs), font(other.font),
 492                                                                   glyph_to_char(std::move(other.glyph_to_char))
 493 {
 494 }
 495
 496 const std::vector<int> &UniscribeParagraphLayout::UniscribeVisualRun::GetGlyphToCharMap() const
 497 {
 498         if (this->glyph_to_char.empty()) {
 499                 this->glyph_to_char.resize(this->GetGlyphCount());
 500
 501                 /* The char to glyph array contains the first glyph index of the cluster that is associated
 502                  * with each character. It is possible for a cluster to be formed of several chars. */
 503                 for (int c = 0; c < (int)this->char_to_glyph.size(); c++) {
 504                         /* If multiple chars map to one glyph, only refer back to the first character. */
 505                         if (this->glyph_to_char[this->char_to_glyph[c]] == 0) this->glyph_to_char[this->char_to_glyph[c]] = c + this->start_pos;
 506                 }
 507
 508                 /* We only marked the first glyph of each cluster in the loop above. Fill the gaps. */
 509                 int last_char = this->glyph_to_char[0];
 510                 for (int g = 0; g < this->GetGlyphCount(); g++) {
 511                         if (this->glyph_to_char[g] != 0) last_char = this->glyph_to_char[g];
 512                         this->glyph_to_char[g] = last_char;
 513                 }
 514         }
 515
 516         return this->glyph_to_char;
 517 }
 518
 519
 520 /* virtual */ void UniscribeStringIterator::SetString(const char *s)
 521 {
 522         const char *string_base = s;
 523
 524         this->utf16_to_utf8.clear();
 525         this->str_info.clear();
 526         this->cur_pos = 0;
 527
 528         /* Uniscribe operates on UTF-16, thus we have to convert the input string.
 529          * To be able to return proper offsets, we have to create a mapping at the same time. */
 530         std::vector<wchar_t> utf16_str;     ///< UTF-16 copy of the string.
 531         while (*s != '\0') {
 532                 size_t idx = s - string_base;
 533
 534                 char32_t c = Utf8Consume(&s);
 535                 if (c < 0x10000) {
 536                         utf16_str.push_back((wchar_t)c);
 537                 } else {
 538                         /* Make a surrogate pair. */
 539                         utf16_str.push_back((wchar_t)(0xD800 + ((c - 0x10000) >> 10)));
 540                         utf16_str.push_back((wchar_t)(0xDC00 + ((c - 0x10000) & 0x3FF)));
 541                         this->utf16_to_utf8.push_back(idx);
 542                 }
 543                 this->utf16_to_utf8.push_back(idx);
 544         }
 545         this->utf16_to_utf8.push_back(s - string_base);
 546
 547         /* Query Uniscribe for word and cluster break information. */
 548         this->str_info.resize(utf16_to_utf8.size());
 549
 550         if (!utf16_str.empty()) {
 551                 /* Itemize string into language runs. */
 552                 std::vector<SCRIPT_ITEM> runs = UniscribeItemizeString(&utf16_str[0], (int32_t)utf16_str.size());
 553
 554                 for (std::vector<SCRIPT_ITEM>::const_iterator run = runs.begin(); !runs.empty() && run != runs.end() - 1; run++) {
 555                         /* Get information on valid word and character break.s */
 556                         int len = (run + 1)->iCharPos - run->iCharPos;
 557                         std::vector<SCRIPT_LOGATTR> attr(len);
 558                         ScriptBreak(&utf16_str[run->iCharPos], len, &run->a, &attr[0]);
 559
 560                         /* Extract the information we're interested in. */
 561                         for (size_t c = 0; c < attr.size(); c++) {
 562                                 /* First character of a run is always a valid word break. */
 563                                 this->str_info[c + run->iCharPos].word_stop = attr[c].fWordStop || c == 0;
 564                                 this->str_info[c + run->iCharPos].char_stop = attr[c].fCharStop;
 565                         }
 566                 }
 567         }
 568
 569         /* End-of-string is always a valid stopping point. */
 570         this->str_info.back().char_stop = true;
 571         this->str_info.back().word_stop = true;
 572 }
 573
 574 /* virtual */ size_t UniscribeStringIterator::SetCurPosition(size_t pos)
 575 {
 576         /* Convert incoming position to an UTF-16 string index. */
 577         size_t utf16_pos = 0;
 578         for (size_t i = 0; i < this->utf16_to_utf8.size(); i++) {
 579                 if (this->utf16_to_utf8[i] == pos) {
 580                         utf16_pos = i;
 581                         break;
 582                 }
 583         }
 584
 585         /* Sanitize in case we get a position inside a grapheme cluster. */
 586         while (utf16_pos > 0 && !this->str_info[utf16_pos].char_stop) utf16_pos--;
 587         this->cur_pos = utf16_pos;
 588
 589         return this->utf16_to_utf8[this->cur_pos];
 590 }
 591
 592 /* virtual */ size_t UniscribeStringIterator::Next(IterType what)
 593 {
 594         assert(this->cur_pos <= this->utf16_to_utf8.size());
 595         assert(what == StringIterator::ITER_CHARACTER || what == StringIterator::ITER_WORD);
 596
 597         if (this->cur_pos == this->utf16_to_utf8.size()) return END;
 598
 599         do {
 600                 this->cur_pos++;
 601         } while (this->cur_pos < this->utf16_to_utf8.size() && (what  == ITER_WORD ? !this->str_info[this->cur_pos].word_stop : !this->str_info[this->cur_pos].char_stop));
 602
 603         return this->cur_pos == this->utf16_to_utf8.size() ? END : this->utf16_to_utf8[this->cur_pos];
 604 }
 605
 606 /*virtual */ size_t UniscribeStringIterator::Prev(IterType what)
 607 {
 608         assert(this->cur_pos <= this->utf16_to_utf8.size());
 609         assert(what == StringIterator::ITER_CHARACTER || what == StringIterator::ITER_WORD);
 610
 611         if (this->cur_pos == 0) return END;
 612
 613         do {
 614                 this->cur_pos--;
 615         } while (this->cur_pos > 0 && (what == ITER_WORD ? !this->str_info[this->cur_pos].word_stop : !this->str_info[this->cur_pos].char_stop));
 616
 617         return this->utf16_to_utf8[this->cur_pos];
 618 }