bump product version to 6.4.0.3
[LibreOffice.git] / vcl / source / font / fontcharmap.cxx
blob9c8b54682041a9992c0307b75f1f0b47a53ca7c0
1 /*
2 * This file is part of the LibreOffice project.
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 * This file incorporates work covered by the following license notice:
10 * Licensed to the Apache Software Foundation (ASF) under one or more
11 * contributor license agreements. See the NOTICE file distributed
12 * with this work for additional information regarding copyright
13 * ownership. The ASF licenses this file to you under the Apache
14 * License, Version 2.0 (the "License"); you may not use this file
15 * except in compliance with the License. You may obtain a copy of
16 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 #include <vcl/fontcharmap.hxx>
19 #include <impfontcharmap.hxx>
20 #include <rtl/textcvt.h>
21 #include <rtl/textenc.h>
22 #include <sal/log.hxx>
24 #include <vector>
25 #include <set>
27 CmapResult::CmapResult( bool bSymbolic,
28 const sal_UCS4* pRangeCodes, int nRangeCount )
29 : mpRangeCodes( pRangeCodes)
30 , mpStartGlyphs( nullptr)
31 , mpGlyphIds( nullptr)
32 , mnRangeCount( nRangeCount)
33 , mbSymbolic( bSymbolic)
34 , mbRecoded( false)
37 static ImplFontCharMapRef g_pDefaultImplFontCharMap;
38 static const sal_UCS4 aDefaultUnicodeRanges[] = {0x0020,0xD800, 0xE000,0xFFF0};
39 static const sal_UCS4 aDefaultSymbolRanges[] = {0x0020,0x0100, 0xF020,0xF100};
41 ImplFontCharMap::~ImplFontCharMap()
43 if( !isDefaultMap() )
45 delete[] mpRangeCodes;
46 delete[] mpStartGlyphs;
47 delete[] mpGlyphIds;
51 ImplFontCharMap::ImplFontCharMap( const CmapResult& rCR )
52 : mpRangeCodes( rCR.mpRangeCodes )
53 , mpStartGlyphs( rCR.mpStartGlyphs )
54 , mpGlyphIds( rCR.mpGlyphIds )
55 , mnRangeCount( rCR.mnRangeCount )
56 , mnCharCount( 0 )
58 const sal_UCS4* pRangePtr = mpRangeCodes;
59 for( int i = mnRangeCount; --i >= 0; pRangePtr += 2 )
61 sal_UCS4 cFirst = pRangePtr[0];
62 sal_UCS4 cLast = pRangePtr[1];
63 mnCharCount += cLast - cFirst;
67 ImplFontCharMapRef const & ImplFontCharMap::getDefaultMap( bool bSymbols )
69 const sal_UCS4* pRangeCodes = aDefaultUnicodeRanges;
70 int nCodesCount = SAL_N_ELEMENTS(aDefaultUnicodeRanges);
71 if( bSymbols )
73 pRangeCodes = aDefaultSymbolRanges;
74 nCodesCount = SAL_N_ELEMENTS(aDefaultSymbolRanges);
77 CmapResult aDefaultCR( bSymbols, pRangeCodes, nCodesCount/2 );
78 g_pDefaultImplFontCharMap = ImplFontCharMapRef(new ImplFontCharMap(aDefaultCR));
80 return g_pDefaultImplFontCharMap;
83 bool ImplFontCharMap::isDefaultMap() const
85 const bool bIsDefault = (mpRangeCodes == aDefaultUnicodeRanges) || (mpRangeCodes == aDefaultSymbolRanges);
86 return bIsDefault;
89 static unsigned GetUInt( const unsigned char* p ) { return((p[0]<<24)+(p[1]<<16)+(p[2]<<8)+p[3]);}
90 static unsigned GetUShort( const unsigned char* p ){ return((p[0]<<8) | p[1]);}
91 static int GetSShort( const unsigned char* p ){ return static_cast<sal_Int16>((p[0]<<8)|p[1]);}
93 // TODO: move CMAP parsing directly into the ImplFontCharMap class
94 bool ParseCMAP( const unsigned char* pCmap, int nLength, CmapResult& rResult )
96 rResult.mpRangeCodes = nullptr;
97 rResult.mpStartGlyphs= nullptr;
98 rResult.mpGlyphIds = nullptr;
99 rResult.mnRangeCount = 0;
100 rResult.mbRecoded = false;
101 rResult.mbSymbolic = false;
103 // parse the table header and check for validity
104 if( !pCmap || (nLength < 24) )
105 return false;
107 if( GetUShort( pCmap ) != 0x0000 ) // simple check for CMAP corruption
108 return false;
110 int nSubTables = GetUShort( pCmap + 2 );
111 if( (nSubTables <= 0) || (nLength < (24 + 8*nSubTables)) )
112 return false;
114 const unsigned char* pEndValidArea = pCmap + nLength;
116 // find the most interesting subtable in the CMAP
117 rtl_TextEncoding eRecodeFrom = RTL_TEXTENCODING_UNICODE;
118 int nOffset = 0;
119 int nFormat = -1;
120 int nBestVal = 0;
121 for( const unsigned char* p = pCmap + 4; --nSubTables >= 0; p += 8 )
123 int nPlatform = GetUShort( p );
124 int nEncoding = GetUShort( p+2 );
125 int nPlatformEncoding = (nPlatform << 8) + nEncoding;
127 int nValue;
128 rtl_TextEncoding eTmpEncoding = RTL_TEXTENCODING_UNICODE;
129 switch( nPlatformEncoding )
131 case 0x000: nValue = 20; break; // Unicode 1.0
132 case 0x001: nValue = 21; break; // Unicode 1.1
133 case 0x002: nValue = 22; break; // iso10646_1993
134 case 0x003: nValue = 23; break; // UCS-2
135 case 0x004: nValue = 24; break; // UCS-4
136 case 0x100: nValue = 22; break; // Mac Unicode<2.0
137 case 0x103: nValue = 23; break; // Mac Unicode>2.0
138 case 0x300: nValue = 5; rResult.mbSymbolic = true; break; // Win Symbol
139 case 0x301: nValue = 28; break; // Win UCS-2
140 case 0x30A: nValue = 29; break; // Win-UCS-4
141 case 0x302: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_SHIFT_JIS; break;
142 case 0x303: nValue = 12; eTmpEncoding = RTL_TEXTENCODING_GB_18030; break;
143 case 0x304: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_BIG5; break;
144 case 0x305: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_MS_949; break;
145 case 0x306: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_MS_1361; break;
146 default: nValue = 0; break;
149 if( nValue <= 0 ) // ignore unknown encodings
150 continue;
152 int nTmpOffset = GetUInt( p+4 );
153 int nTmpFormat = GetUShort( pCmap + nTmpOffset );
154 if( nTmpFormat == 12 ) // 32bit code -> glyph map format
155 nValue += 3;
156 else if( nTmpFormat != 4 ) // 16bit code -> glyph map format
157 continue; // ignore other formats
159 if( nBestVal < nValue )
161 nBestVal = nValue;
162 nOffset = nTmpOffset;
163 nFormat = nTmpFormat;
164 eRecodeFrom = eTmpEncoding;
168 // parse the best CMAP subtable
169 int nRangeCount = 0;
170 sal_UCS4* pCodePairs = nullptr;
171 int* pStartGlyphs = nullptr;
173 std::vector<sal_uInt16> aGlyphIdArray;
174 aGlyphIdArray.reserve( 0x1000 );
175 aGlyphIdArray.push_back( 0 );
177 // format 4, the most common 16bit char mapping table
178 if( (nFormat == 4) && ((nOffset+16) < nLength) )
180 int nSegCountX2 = GetUShort( pCmap + nOffset + 6 );
181 nRangeCount = nSegCountX2/2 - 1;
182 pCodePairs = new sal_UCS4[ nRangeCount * 2 ];
183 pStartGlyphs = new int[ nRangeCount ];
184 const unsigned char* pLimitBase = pCmap + nOffset + 14;
185 const unsigned char* pBeginBase = pLimitBase + nSegCountX2 + 2;
186 const unsigned char* pDeltaBase = pBeginBase + nSegCountX2;
187 const unsigned char* pOffsetBase = pDeltaBase + nSegCountX2;
188 sal_UCS4* pCP = pCodePairs;
189 for( int i = 0; i < nRangeCount; ++i )
191 const sal_UCS4 cMinChar = GetUShort( pBeginBase + 2*i );
192 const sal_UCS4 cMaxChar = GetUShort( pLimitBase + 2*i );
193 const int nGlyphDelta = GetSShort( pDeltaBase + 2*i );
194 const int nRangeOffset = GetUShort( pOffsetBase + 2*i );
195 if( cMinChar > cMaxChar ) { // no sane font should trigger this
196 SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
197 break;
199 if( cMaxChar == 0xFFFF ) {
200 SAL_WARN("vcl.gdi", "Format 4 char should not be 0xFFFF");
201 break;
203 if( !nRangeOffset ) {
204 // glyphid can be calculated directly
205 pStartGlyphs[i] = (cMinChar + nGlyphDelta) & 0xFFFF;
206 } else {
207 // update the glyphid-array with the glyphs in this range
208 pStartGlyphs[i] = -static_cast<int>(aGlyphIdArray.size());
209 const unsigned char* pGlyphIdPtr = pOffsetBase + 2*i + nRangeOffset;
210 const size_t nRemainingSize = pEndValidArea - pGlyphIdPtr;
211 const size_t nMaxPossibleRecords = nRemainingSize/2;
212 if (nMaxPossibleRecords == 0) { // no sane font should trigger this
213 SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
214 break;
216 const size_t nMaxLegalChar = cMinChar + nMaxPossibleRecords-1;
217 if (cMaxChar > nMaxLegalChar) { // no sane font should trigger this
218 SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
219 break;
221 for( sal_UCS4 c = cMinChar; c <= cMaxChar; ++c, pGlyphIdPtr+=2 ) {
222 const int nGlyphIndex = GetUShort( pGlyphIdPtr ) + nGlyphDelta;
223 aGlyphIdArray.push_back( static_cast<sal_uInt16>(nGlyphIndex) );
226 *(pCP++) = cMinChar;
227 *(pCP++) = cMaxChar + 1;
229 nRangeCount = (pCP - pCodePairs) / 2;
231 // format 12, the most common 32bit char mapping table
232 else if( (nFormat == 12) && ((nOffset+16) < nLength) )
234 nRangeCount = GetUInt( pCmap + nOffset + 12 );
235 if (nRangeCount < 0)
237 SAL_WARN("vcl.gdi", "negative RangeCount");
238 nRangeCount = 0;
241 const int nGroupOffset = nOffset + 16;
242 const int nRemainingLen = nLength - nGroupOffset;
243 const int nMaxPossiblePairs = nRemainingLen / 12;
244 if (nRangeCount > nMaxPossiblePairs)
246 SAL_WARN("vcl.gdi", "more code pairs requested then space available");
247 nRangeCount = nMaxPossiblePairs;
250 pCodePairs = new sal_UCS4[ nRangeCount * 2 ];
251 pStartGlyphs = new int[ nRangeCount ];
253 const unsigned char* pGroup = pCmap + nGroupOffset;
254 sal_UCS4* pCP = pCodePairs;
255 for( int i = 0; i < nRangeCount; ++i )
257 sal_UCS4 cMinChar = GetUInt( pGroup + 0 );
258 sal_UCS4 cMaxChar = GetUInt( pGroup + 4 );
259 int nGlyphId = GetUInt( pGroup + 8 );
260 pGroup += 12;
262 if( cMinChar > cMaxChar ) { // no sane font should trigger this
263 SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
264 break;
267 *(pCP++) = cMinChar;
268 *(pCP++) = cMaxChar + 1;
269 pStartGlyphs[i] = nGlyphId;
271 nRangeCount = (pCP - pCodePairs) / 2;
274 // check if any subtable resulted in something usable
275 if( nRangeCount <= 0 )
277 delete[] pCodePairs;
278 delete[] pStartGlyphs;
280 // even when no CMAP is available we know it for symbol fonts
281 if( rResult.mbSymbolic )
283 pCodePairs = new sal_UCS4[4];
284 pCodePairs[0] = 0x0020; // aliased symbols
285 pCodePairs[1] = 0x0100;
286 pCodePairs[2] = 0xF020; // original symbols
287 pCodePairs[3] = 0xF100;
288 rResult.mpRangeCodes = pCodePairs;
289 rResult.mnRangeCount = 2;
290 return true;
293 return false;
296 // recode the code ranges to their unicode encoded ranges if needed
297 rtl_TextToUnicodeConverter aConverter = nullptr;
298 rtl_UnicodeToTextContext aCvtContext = nullptr;
300 rResult.mbRecoded = ( eRecodeFrom != RTL_TEXTENCODING_UNICODE );
301 if( rResult.mbRecoded )
303 aConverter = rtl_createTextToUnicodeConverter( eRecodeFrom );
304 aCvtContext = rtl_createTextToUnicodeContext( aConverter );
307 if( aConverter && aCvtContext )
309 // determine the set of supported code points from encoded ranges
310 std::set<sal_UCS4> aSupportedCodePoints;
312 static const int NINSIZE = 64;
313 static const int NOUTSIZE = 64;
314 sal_Char cCharsInp[ NINSIZE ];
315 sal_Unicode cCharsOut[ NOUTSIZE ];
316 sal_UCS4* pCP = pCodePairs;
317 for( int i = 0; i < nRangeCount; ++i )
319 sal_UCS4 cMin = *(pCP++);
320 sal_UCS4 cEnd = *(pCP++);
321 while( cMin < cEnd )
323 int j = 0;
324 for(; (cMin < cEnd) && (j < NINSIZE); ++cMin )
326 if( cMin >= 0x0100 )
327 cCharsInp[ j++ ] = static_cast<sal_Char>(cMin >> 8);
328 if( (cMin >= 0x0100) || (cMin < 0x00A0) )
329 cCharsInp[ j++ ] = static_cast<sal_Char>(cMin);
332 sal_uInt32 nCvtInfo;
333 sal_Size nSrcCvtBytes;
334 int nOutLen = rtl_convertTextToUnicode(
335 aConverter, aCvtContext,
336 cCharsInp, j, cCharsOut, NOUTSIZE,
337 RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE
338 | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE,
339 &nCvtInfo, &nSrcCvtBytes );
341 for( j = 0; j < nOutLen; ++j )
342 aSupportedCodePoints.insert( cCharsOut[j] );
346 rtl_destroyTextToUnicodeConverter( aCvtContext );
347 rtl_destroyTextToUnicodeConverter( aConverter );
349 // convert the set of supported code points to ranges
350 std::vector<sal_UCS4> aSupportedRanges;
352 for (auto const& supportedPoint : aSupportedCodePoints)
354 if( aSupportedRanges.empty()
355 || (aSupportedRanges.back() != supportedPoint) )
357 // add new range beginning with current unicode
358 aSupportedRanges.push_back(supportedPoint);
359 aSupportedRanges.push_back( 0 );
362 // extend existing range to include current unicode
363 aSupportedRanges.back() = supportedPoint + 1;
366 // glyph mapping for non-unicode fonts not implemented
367 delete[] pStartGlyphs;
368 pStartGlyphs = nullptr;
369 aGlyphIdArray.clear();
371 // make a pCodePairs array using the vector from above
372 delete[] pCodePairs;
373 nRangeCount = aSupportedRanges.size() / 2;
374 if( nRangeCount <= 0 )
375 return false;
376 pCodePairs = new sal_UCS4[ nRangeCount * 2 ];
377 pCP = pCodePairs;
378 for (auto const& supportedRange : aSupportedRanges)
379 *(pCP++) = supportedRange;
382 // prepare the glyphid-array if needed
383 // TODO: merge ranges if they are close enough?
384 sal_uInt16* pGlyphIds = nullptr;
385 if( !aGlyphIdArray.empty())
387 pGlyphIds = new sal_uInt16[ aGlyphIdArray.size() ];
388 sal_uInt16* pOut = pGlyphIds;
389 for (auto const& glyphId : aGlyphIdArray)
390 *(pOut++) = glyphId;
393 // update the result struct
394 rResult.mpRangeCodes = pCodePairs;
395 rResult.mpStartGlyphs = pStartGlyphs;
396 rResult.mnRangeCount = nRangeCount;
397 rResult.mpGlyphIds = pGlyphIds;
398 return true;
401 FontCharMap::FontCharMap()
402 : mpImplFontCharMap( ImplFontCharMap::getDefaultMap() )
406 FontCharMap::FontCharMap( ImplFontCharMapRef const & pIFCMap )
407 : mpImplFontCharMap( pIFCMap )
411 FontCharMap::FontCharMap( const CmapResult& rCR )
412 : mpImplFontCharMap(new ImplFontCharMap(rCR))
416 FontCharMap::~FontCharMap()
418 mpImplFontCharMap = nullptr;
421 FontCharMapRef FontCharMap::GetDefaultMap( bool bSymbol )
423 FontCharMapRef xFontCharMap( new FontCharMap( ImplFontCharMap::getDefaultMap( bSymbol ) ) );
424 return xFontCharMap;
427 bool FontCharMap::IsDefaultMap() const
429 return mpImplFontCharMap->isDefaultMap();
432 int FontCharMap::GetCharCount() const
434 return mpImplFontCharMap->mnCharCount;
437 int FontCharMap::CountCharsInRange( sal_UCS4 cMin, sal_UCS4 cMax ) const
439 int nCount = 0;
441 // find and adjust range and char count for cMin
442 int nRangeMin = findRangeIndex( cMin );
443 if( nRangeMin & 1 )
444 ++nRangeMin;
445 else if( cMin > mpImplFontCharMap->mpRangeCodes[ nRangeMin ] )
446 nCount -= cMin - mpImplFontCharMap->mpRangeCodes[ nRangeMin ];
448 // find and adjust range and char count for cMax
449 int nRangeMax = findRangeIndex( cMax );
450 if( nRangeMax & 1 )
451 --nRangeMax;
452 else
453 nCount -= mpImplFontCharMap->mpRangeCodes[ nRangeMax+1 ] - cMax - 1;
455 // count chars in complete ranges between cMin and cMax
456 for( int i = nRangeMin; i <= nRangeMax; i+=2 )
457 nCount += mpImplFontCharMap->mpRangeCodes[i+1] - mpImplFontCharMap->mpRangeCodes[i];
459 return nCount;
462 bool FontCharMap::HasChar( sal_UCS4 cChar ) const
464 bool bHasChar = false;
466 if( mpImplFontCharMap->mpStartGlyphs == nullptr ) { // only the char-ranges are known
467 const int nRange = findRangeIndex( cChar );
468 if( nRange==0 && cChar < mpImplFontCharMap->mpRangeCodes[0] )
469 return false;
470 bHasChar = ((nRange & 1) == 0); // inside a range
471 } else { // glyph mapping is available
472 const int nGlyphIndex = GetGlyphIndex( cChar );
473 bHasChar = (nGlyphIndex != 0); // not the notdef-glyph
476 return bHasChar;
479 sal_UCS4 FontCharMap::GetFirstChar() const
481 return mpImplFontCharMap->mpRangeCodes[0];
484 sal_UCS4 FontCharMap::GetLastChar() const
486 return (mpImplFontCharMap->mpRangeCodes[ 2*mpImplFontCharMap->mnRangeCount-1 ] - 1);
489 sal_UCS4 FontCharMap::GetNextChar( sal_UCS4 cChar ) const
491 if( cChar < GetFirstChar() )
492 return GetFirstChar();
493 if( cChar >= GetLastChar() )
494 return GetLastChar();
496 int nRange = findRangeIndex( cChar + 1 );
497 if( nRange & 1 ) // outside of range?
498 return mpImplFontCharMap->mpRangeCodes[ nRange + 1 ]; // => first in next range
499 return (cChar + 1);
502 sal_UCS4 FontCharMap::GetPrevChar( sal_UCS4 cChar ) const
504 if( cChar <= GetFirstChar() )
505 return GetFirstChar();
506 if( cChar > GetLastChar() )
507 return GetLastChar();
509 int nRange = findRangeIndex( cChar - 1 );
510 if( nRange & 1 ) // outside a range?
511 return (mpImplFontCharMap->mpRangeCodes[ nRange ] - 1); // => last in prev range
512 return (cChar - 1);
515 int FontCharMap::GetIndexFromChar( sal_UCS4 cChar ) const
517 // TODO: improve linear walk?
518 int nCharIndex = 0;
519 const sal_UCS4* pRange = &mpImplFontCharMap->mpRangeCodes[0];
520 for( int i = 0; i < mpImplFontCharMap->mnRangeCount; ++i )
522 sal_UCS4 cFirst = *(pRange++);
523 sal_UCS4 cLast = *(pRange++);
524 if( cChar >= cLast )
525 nCharIndex += cLast - cFirst;
526 else if( cChar >= cFirst )
527 return nCharIndex + (cChar - cFirst);
528 else
529 break;
532 return -1;
535 sal_UCS4 FontCharMap::GetCharFromIndex( int nIndex ) const
537 // TODO: improve linear walk?
538 const sal_UCS4* pRange = &mpImplFontCharMap->mpRangeCodes[0];
539 for( int i = 0; i < mpImplFontCharMap->mnRangeCount; ++i )
541 sal_UCS4 cFirst = *(pRange++);
542 sal_UCS4 cLast = *(pRange++);
543 nIndex -= cLast - cFirst;
544 if( nIndex < 0 )
545 return (cLast + nIndex);
548 // we can only get here with an out-of-bounds charindex
549 return mpImplFontCharMap->mpRangeCodes[0];
552 int FontCharMap::findRangeIndex( sal_UCS4 cChar ) const
554 int nLower = 0;
555 int nMid = mpImplFontCharMap->mnRangeCount;
556 int nUpper = 2 * mpImplFontCharMap->mnRangeCount - 1;
557 while( nLower < nUpper )
559 if( cChar >= mpImplFontCharMap->mpRangeCodes[ nMid ] )
560 nLower = nMid;
561 else
562 nUpper = nMid - 1;
563 nMid = (nLower + nUpper + 1) / 2;
566 return nMid;
569 int FontCharMap::GetGlyphIndex( sal_UCS4 cChar ) const
571 // return -1 if the object doesn't know the glyph ids
572 if( !mpImplFontCharMap->mpStartGlyphs )
573 return -1;
575 // return 0 if the unicode doesn't have a matching glyph
576 int nRange = findRangeIndex( cChar );
577 // check that we are inside any range
578 if( (nRange == 0) && (cChar < mpImplFontCharMap->mpRangeCodes[0]) ) {
579 // symbol aliasing gives symbol fonts a second chance
580 const bool bSymbolic = cChar <= 0xFF && (mpImplFontCharMap->mpRangeCodes[0]>=0xF000) &&
581 (mpImplFontCharMap->mpRangeCodes[1]<=0xF0FF);
582 if( !bSymbolic )
583 return 0;
584 // check for symbol aliasing (U+F0xx -> U+00xx)
585 cChar |= 0xF000;
586 nRange = findRangeIndex( cChar );
587 if( (nRange == 0) && (cChar < mpImplFontCharMap->mpRangeCodes[0]) ) {
588 return 0;
591 // check that we are inside a range
592 if( (nRange & 1) != 0 )
593 return 0;
595 // get glyph index directly or indirectly
596 int nGlyphIndex = cChar - mpImplFontCharMap->mpRangeCodes[ nRange ];
597 const int nStartIndex = mpImplFontCharMap->mpStartGlyphs[ nRange/2 ];
598 if( nStartIndex >= 0 ) {
599 // the glyph index can be calculated
600 nGlyphIndex += nStartIndex;
601 } else {
602 // the glyphid array has the glyph index
603 nGlyphIndex = mpImplFontCharMap->mpGlyphIds[ nGlyphIndex - nStartIndex];
606 return nGlyphIndex;
609 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */