Version 5.4.3.2, tag libreoffice-5.4.3.2
[LibreOffice.git] / vcl / source / font / fontcharmap.cxx
blobf30c4e6702421f73b99cd95afb9caf63339541eb
1 /*
2 * This file is part of the LibreOffice project.
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 * This file incorporates work covered by the following license notice:
10 * Licensed to the Apache Software Foundation (ASF) under one or more
11 * contributor license agreements. See the NOTICE file distributed
12 * with this work for additional information regarding copyright
13 * ownership. The ASF licenses this file to you under the Apache
14 * License, Version 2.0 (the "License"); you may not use this file
15 * except in compliance with the License. You may obtain a copy of
16 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 #include <vcl/fontcharmap.hxx>
19 #include <fontinstance.hxx>
20 #include <impfontcharmap.hxx>
22 #include <vector>
23 #include <set>
25 CmapResult::CmapResult( bool bSymbolic,
26 const sal_UCS4* pRangeCodes, int nRangeCount )
27 : mpRangeCodes( pRangeCodes)
28 , mpStartGlyphs( nullptr)
29 , mpGlyphIds( nullptr)
30 , mnRangeCount( nRangeCount)
31 , mbSymbolic( bSymbolic)
32 , mbRecoded( false)
35 static ImplFontCharMapRef xDefaultImplFontCharMap;
36 static const sal_UCS4 aDefaultUnicodeRanges[] = {0x0020,0xD800, 0xE000,0xFFF0};
37 static const sal_UCS4 aDefaultSymbolRanges[] = {0x0020,0x0100, 0xF020,0xF100};
39 ImplFontCharMap::~ImplFontCharMap()
41 if( isDefaultMap() )
42 return;
43 delete[] mpRangeCodes;
44 delete[] mpStartGlyphs;
45 delete[] mpGlyphIds;
48 ImplFontCharMap::ImplFontCharMap( const CmapResult& rCR )
49 : mpRangeCodes( rCR.mpRangeCodes )
50 , mpStartGlyphs( rCR.mpStartGlyphs )
51 , mpGlyphIds( rCR.mpGlyphIds )
52 , mnRangeCount( rCR.mnRangeCount )
53 , mnCharCount( 0 )
55 const sal_UCS4* pRangePtr = mpRangeCodes;
56 for( int i = mnRangeCount; --i >= 0; pRangePtr += 2 )
58 sal_UCS4 cFirst = pRangePtr[0];
59 sal_UCS4 cLast = pRangePtr[1];
60 mnCharCount += cLast - cFirst;
64 ImplFontCharMapRef const & ImplFontCharMap::getDefaultMap( bool bSymbols )
66 const sal_UCS4* pRangeCodes = aDefaultUnicodeRanges;
67 int nCodesCount = sizeof(aDefaultUnicodeRanges) / sizeof(*pRangeCodes);
68 if( bSymbols )
70 pRangeCodes = aDefaultSymbolRanges;
71 nCodesCount = sizeof(aDefaultSymbolRanges) / sizeof(*pRangeCodes);
74 CmapResult aDefaultCR( bSymbols, pRangeCodes, nCodesCount/2 );
75 xDefaultImplFontCharMap = ImplFontCharMapRef(new ImplFontCharMap(aDefaultCR));
77 return xDefaultImplFontCharMap;
80 bool ImplFontCharMap::isDefaultMap() const
82 const bool bIsDefault = (mpRangeCodes == aDefaultUnicodeRanges) || (mpRangeCodes == aDefaultSymbolRanges);
83 return bIsDefault;
86 static unsigned GetUInt( const unsigned char* p ) { return((p[0]<<24)+(p[1]<<16)+(p[2]<<8)+p[3]);}
87 static unsigned Getsal_uInt16( const unsigned char* p ){ return((p[0]<<8) | p[1]);}
88 static int GetSShort( const unsigned char* p ){ return static_cast<sal_Int16>((p[0]<<8)|p[1]);}
90 // TODO: move CMAP parsing directly into the ImplFontCharMap class
91 bool ParseCMAP( const unsigned char* pCmap, int nLength, CmapResult& rResult )
93 rResult.mpRangeCodes = nullptr;
94 rResult.mpStartGlyphs= nullptr;
95 rResult.mpGlyphIds = nullptr;
96 rResult.mnRangeCount = 0;
97 rResult.mbRecoded = false;
98 rResult.mbSymbolic = false;
100 // parse the table header and check for validity
101 if( !pCmap || (nLength < 24) )
102 return false;
104 if( Getsal_uInt16( pCmap ) != 0x0000 ) // simple check for CMAP corruption
105 return false;
107 int nSubTables = Getsal_uInt16( pCmap + 2 );
108 if( (nSubTables <= 0) || (nLength < (24 + 8*nSubTables)) )
109 return false;
111 const unsigned char* pEndValidArea = pCmap + nLength;
113 // find the most interesting subtable in the CMAP
114 rtl_TextEncoding eRecodeFrom = RTL_TEXTENCODING_UNICODE;
115 int nOffset = 0;
116 int nFormat = -1;
117 int nBestVal = 0;
118 for( const unsigned char* p = pCmap + 4; --nSubTables >= 0; p += 8 )
120 int nPlatform = Getsal_uInt16( p );
121 int nEncoding = Getsal_uInt16( p+2 );
122 int nPlatformEncoding = (nPlatform << 8) + nEncoding;
124 int nValue;
125 rtl_TextEncoding eTmpEncoding = RTL_TEXTENCODING_UNICODE;
126 switch( nPlatformEncoding )
128 case 0x000: nValue = 20; break; // Unicode 1.0
129 case 0x001: nValue = 21; break; // Unicode 1.1
130 case 0x002: nValue = 22; break; // iso10646_1993
131 case 0x003: nValue = 23; break; // UCS-2
132 case 0x004: nValue = 24; break; // UCS-4
133 case 0x100: nValue = 22; break; // Mac Unicode<2.0
134 case 0x103: nValue = 23; break; // Mac Unicode>2.0
135 case 0x300: nValue = 5; rResult.mbSymbolic = true; break; // Win Symbol
136 case 0x301: nValue = 28; break; // Win UCS-2
137 case 0x30A: nValue = 29; break; // Win-UCS-4
138 case 0x302: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_SHIFT_JIS; break;
139 case 0x303: nValue = 12; eTmpEncoding = RTL_TEXTENCODING_GB_18030; break;
140 case 0x304: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_BIG5; break;
141 case 0x305: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_MS_949; break;
142 case 0x306: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_MS_1361; break;
143 default: nValue = 0; break;
146 if( nValue <= 0 ) // ignore unknown encodings
147 continue;
149 int nTmpOffset = GetUInt( p+4 );
150 int nTmpFormat = Getsal_uInt16( pCmap + nTmpOffset );
151 if( nTmpFormat == 12 ) // 32bit code -> glyph map format
152 nValue += 3;
153 else if( nTmpFormat != 4 ) // 16bit code -> glyph map format
154 continue; // ignore other formats
156 if( nBestVal < nValue )
158 nBestVal = nValue;
159 nOffset = nTmpOffset;
160 nFormat = nTmpFormat;
161 eRecodeFrom = eTmpEncoding;
165 // parse the best CMAP subtable
166 int nRangeCount = 0;
167 sal_UCS4* pCodePairs = nullptr;
168 int* pStartGlyphs = nullptr;
170 std::vector<sal_uInt16> aGlyphIdArray;
171 aGlyphIdArray.reserve( 0x1000 );
172 aGlyphIdArray.push_back( 0 );
174 // format 4, the most common 16bit char mapping table
175 if( (nFormat == 4) && ((nOffset+16) < nLength) )
177 int nSegCountX2 = Getsal_uInt16( pCmap + nOffset + 6 );
178 nRangeCount = nSegCountX2/2 - 1;
179 pCodePairs = new sal_UCS4[ nRangeCount * 2 ];
180 pStartGlyphs = new int[ nRangeCount ];
181 const unsigned char* pLimitBase = pCmap + nOffset + 14;
182 const unsigned char* pBeginBase = pLimitBase + nSegCountX2 + 2;
183 const unsigned char* pDeltaBase = pBeginBase + nSegCountX2;
184 const unsigned char* pOffsetBase = pDeltaBase + nSegCountX2;
185 sal_UCS4* pCP = pCodePairs;
186 for( int i = 0; i < nRangeCount; ++i )
188 const sal_UCS4 cMinChar = Getsal_uInt16( pBeginBase + 2*i );
189 const sal_UCS4 cMaxChar = Getsal_uInt16( pLimitBase + 2*i );
190 const int nGlyphDelta = GetSShort( pDeltaBase + 2*i );
191 const int nRangeOffset = Getsal_uInt16( pOffsetBase + 2*i );
192 if( cMinChar > cMaxChar ) { // no sane font should trigger this
193 SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
194 break;
196 if( cMaxChar == 0xFFFF ) {
197 SAL_WARN("vcl.gdi", "Format 4 char should not be 0xFFFF");
198 break;
200 if( !nRangeOffset ) {
201 // glyphid can be calculated directly
202 pStartGlyphs[i] = (cMinChar + nGlyphDelta) & 0xFFFF;
203 } else {
204 // update the glyphid-array with the glyphs in this range
205 pStartGlyphs[i] = -(int)aGlyphIdArray.size();
206 const unsigned char* pGlyphIdPtr = pOffsetBase + 2*i + nRangeOffset;
207 const size_t nRemainingSize = pEndValidArea - pGlyphIdPtr;
208 const size_t nMaxPossibleRecords = nRemainingSize/2;
209 if (nMaxPossibleRecords == 0) { // no sane font should trigger this
210 SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
211 break;
213 const size_t nMaxLegalChar = cMinChar + nMaxPossibleRecords-1;
214 if (cMaxChar > nMaxLegalChar) { // no sane font should trigger this
215 SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
216 break;
218 for( sal_UCS4 c = cMinChar; c <= cMaxChar; ++c, pGlyphIdPtr+=2 ) {
219 const int nGlyphIndex = Getsal_uInt16( pGlyphIdPtr ) + nGlyphDelta;
220 aGlyphIdArray.push_back( static_cast<sal_uInt16>(nGlyphIndex) );
223 *(pCP++) = cMinChar;
224 *(pCP++) = cMaxChar + 1;
226 nRangeCount = (pCP - pCodePairs) / 2;
228 // format 12, the most common 32bit char mapping table
229 else if( (nFormat == 12) && ((nOffset+16) < nLength) )
231 nRangeCount = GetUInt( pCmap + nOffset + 12 );
232 pCodePairs = new sal_UCS4[ nRangeCount * 2 ];
233 pStartGlyphs = new int[ nRangeCount ];
234 const unsigned char* pGroup = pCmap + nOffset + 16;
235 sal_UCS4* pCP = pCodePairs;
236 for( int i = 0; i < nRangeCount; ++i )
238 sal_UCS4 cMinChar = GetUInt( pGroup + 0 );
239 sal_UCS4 cMaxChar = GetUInt( pGroup + 4 );
240 int nGlyphId = GetUInt( pGroup + 8 );
241 pGroup += 12;
243 if( cMinChar > cMaxChar ) { // no sane font should trigger this
244 SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
245 break;
248 *(pCP++) = cMinChar;
249 *(pCP++) = cMaxChar + 1;
250 pStartGlyphs[i] = nGlyphId;
252 nRangeCount = (pCP - pCodePairs) / 2;
255 // check if any subtable resulted in something usable
256 if( nRangeCount <= 0 )
258 delete[] pCodePairs;
259 delete[] pStartGlyphs;
261 // even when no CMAP is available we know it for symbol fonts
262 if( rResult.mbSymbolic )
264 pCodePairs = new sal_UCS4[4];
265 pCodePairs[0] = 0x0020; // aliased symbols
266 pCodePairs[1] = 0x0100;
267 pCodePairs[2] = 0xF020; // original symbols
268 pCodePairs[3] = 0xF100;
269 rResult.mpRangeCodes = pCodePairs;
270 rResult.mnRangeCount = 2;
271 return true;
274 return false;
277 // recode the code ranges to their unicode encoded ranges if needed
278 rtl_TextToUnicodeConverter aConverter = nullptr;
279 rtl_UnicodeToTextContext aCvtContext = nullptr;
281 rResult.mbRecoded = ( eRecodeFrom != RTL_TEXTENCODING_UNICODE );
282 if( rResult.mbRecoded )
284 aConverter = rtl_createTextToUnicodeConverter( eRecodeFrom );
285 aCvtContext = rtl_createTextToUnicodeContext( aConverter );
288 if( aConverter && aCvtContext )
290 // determine the set of supported code points from encoded ranges
291 std::set<sal_UCS4> aSupportedCodePoints;
293 static const int NINSIZE = 64;
294 static const int NOUTSIZE = 64;
295 sal_Char cCharsInp[ NINSIZE ];
296 sal_Unicode cCharsOut[ NOUTSIZE ];
297 sal_UCS4* pCP = pCodePairs;
298 for( int i = 0; i < nRangeCount; ++i )
300 sal_UCS4 cMin = *(pCP++);
301 sal_UCS4 cEnd = *(pCP++);
302 while( cMin < cEnd )
304 int j = 0;
305 for(; (cMin < cEnd) && (j < NINSIZE); ++cMin )
307 if( cMin >= 0x0100 )
308 cCharsInp[ j++ ] = static_cast<sal_Char>(cMin >> 8);
309 if( (cMin >= 0x0100) || (cMin < 0x00A0) )
310 cCharsInp[ j++ ] = static_cast<sal_Char>(cMin);
313 sal_uInt32 nCvtInfo;
314 sal_Size nSrcCvtBytes;
315 int nOutLen = rtl_convertTextToUnicode(
316 aConverter, aCvtContext,
317 cCharsInp, j, cCharsOut, NOUTSIZE,
318 RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE
319 | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE,
320 &nCvtInfo, &nSrcCvtBytes );
322 for( j = 0; j < nOutLen; ++j )
323 aSupportedCodePoints.insert( cCharsOut[j] );
327 rtl_destroyTextToUnicodeConverter( aCvtContext );
328 rtl_destroyTextToUnicodeConverter( aConverter );
330 // convert the set of supported code points to ranges
331 std::vector<sal_UCS4> aSupportedRanges;
333 std::set<sal_UCS4>::const_iterator itChar = aSupportedCodePoints.begin();
334 for(; itChar != aSupportedCodePoints.end(); ++itChar )
336 if( aSupportedRanges.empty()
337 || (aSupportedRanges.back() != *itChar) )
339 // add new range beginning with current unicode
340 aSupportedRanges.push_back( *itChar );
341 aSupportedRanges.push_back( 0 );
344 // extend existing range to include current unicode
345 aSupportedRanges.back() = *itChar + 1;
348 // glyph mapping for non-unicode fonts not implemented
349 delete[] pStartGlyphs;
350 pStartGlyphs = nullptr;
351 aGlyphIdArray.clear();
353 // make a pCodePairs array using the vector from above
354 delete[] pCodePairs;
355 nRangeCount = aSupportedRanges.size() / 2;
356 if( nRangeCount <= 0 )
357 return false;
358 pCodePairs = new sal_UCS4[ nRangeCount * 2 ];
359 std::vector<sal_UCS4>::const_iterator itInt = aSupportedRanges.begin();
360 for( pCP = pCodePairs; itInt != aSupportedRanges.end(); ++itInt )
361 *(pCP++) = *itInt;
364 // prepare the glyphid-array if needed
365 // TODO: merge ranges if they are close enough?
366 sal_uInt16* pGlyphIds = nullptr;
367 if( !aGlyphIdArray.empty())
369 pGlyphIds = new sal_uInt16[ aGlyphIdArray.size() ];
370 sal_uInt16* pOut = pGlyphIds;
371 std::vector<sal_uInt16>::const_iterator it = aGlyphIdArray.begin();
372 while( it != aGlyphIdArray.end() )
373 *(pOut++) = *(it++);
376 // update the result struct
377 rResult.mpRangeCodes = pCodePairs;
378 rResult.mpStartGlyphs = pStartGlyphs;
379 rResult.mnRangeCount = nRangeCount;
380 rResult.mpGlyphIds = pGlyphIds;
381 return true;
384 FontCharMap::FontCharMap()
385 : mpImplFontCharMap( ImplFontCharMap::getDefaultMap() )
389 FontCharMap::FontCharMap( ImplFontCharMapRef const & pIFCMap )
390 : mpImplFontCharMap( pIFCMap )
394 FontCharMap::FontCharMap( const CmapResult& rCR )
395 : mpImplFontCharMap(new ImplFontCharMap(rCR))
399 FontCharMap::~FontCharMap()
401 mpImplFontCharMap = nullptr;
404 FontCharMapRef FontCharMap::GetDefaultMap( bool bSymbol )
406 FontCharMapRef xFontCharMap( new FontCharMap( ImplFontCharMap::getDefaultMap( bSymbol ) ) );
407 return xFontCharMap;
410 bool FontCharMap::IsDefaultMap() const
412 return mpImplFontCharMap->isDefaultMap();
415 int FontCharMap::GetCharCount() const
417 return mpImplFontCharMap->mnCharCount;
420 int FontCharMap::CountCharsInRange( sal_UCS4 cMin, sal_UCS4 cMax ) const
422 int nCount = 0;
424 // find and adjust range and char count for cMin
425 int nRangeMin = findRangeIndex( cMin );
426 if( nRangeMin & 1 )
427 ++nRangeMin;
428 else if( cMin > mpImplFontCharMap->mpRangeCodes[ nRangeMin ] )
429 nCount -= cMin - mpImplFontCharMap->mpRangeCodes[ nRangeMin ];
431 // find and adjust range and char count for cMax
432 int nRangeMax = findRangeIndex( cMax );
433 if( nRangeMax & 1 )
434 --nRangeMax;
435 else
436 nCount -= mpImplFontCharMap->mpRangeCodes[ nRangeMax+1 ] - cMax - 1;
438 // count chars in complete ranges between cMin and cMax
439 for( int i = nRangeMin; i <= nRangeMax; i+=2 )
440 nCount += mpImplFontCharMap->mpRangeCodes[i+1] - mpImplFontCharMap->mpRangeCodes[i];
442 return nCount;
445 bool FontCharMap::HasChar( sal_UCS4 cChar ) const
447 bool bHasChar = false;
449 if( mpImplFontCharMap->mpStartGlyphs == nullptr ) { // only the char-ranges are known
450 const int nRange = findRangeIndex( cChar );
451 if( nRange==0 && cChar < mpImplFontCharMap->mpRangeCodes[0] )
452 return false;
453 bHasChar = ((nRange & 1) == 0); // inside a range
454 } else { // glyph mapping is available
455 const int nGlyphIndex = GetGlyphIndex( cChar );
456 bHasChar = (nGlyphIndex != 0); // not the notdef-glyph
459 return bHasChar;
462 sal_UCS4 FontCharMap::GetFirstChar() const
464 return mpImplFontCharMap->mpRangeCodes[0];
467 sal_UCS4 FontCharMap::GetLastChar() const
469 return (mpImplFontCharMap->mpRangeCodes[ 2*mpImplFontCharMap->mnRangeCount-1 ] - 1);
472 sal_UCS4 FontCharMap::GetNextChar( sal_UCS4 cChar ) const
474 if( cChar < GetFirstChar() )
475 return GetFirstChar();
476 if( cChar >= GetLastChar() )
477 return GetLastChar();
479 int nRange = findRangeIndex( cChar + 1 );
480 if( nRange & 1 ) // outside of range?
481 return mpImplFontCharMap->mpRangeCodes[ nRange + 1 ]; // => first in next range
482 return (cChar + 1);
485 sal_UCS4 FontCharMap::GetPrevChar( sal_UCS4 cChar ) const
487 if( cChar <= GetFirstChar() )
488 return GetFirstChar();
489 if( cChar > GetLastChar() )
490 return GetLastChar();
492 int nRange = findRangeIndex( cChar - 1 );
493 if( nRange & 1 ) // outside a range?
494 return (mpImplFontCharMap->mpRangeCodes[ nRange ] - 1); // => last in prev range
495 return (cChar - 1);
498 int FontCharMap::GetIndexFromChar( sal_UCS4 cChar ) const
500 // TODO: improve linear walk?
501 int nCharIndex = 0;
502 const sal_UCS4* pRange = &mpImplFontCharMap->mpRangeCodes[0];
503 for( int i = 0; i < mpImplFontCharMap->mnRangeCount; ++i )
505 sal_UCS4 cFirst = *(pRange++);
506 sal_UCS4 cLast = *(pRange++);
507 if( cChar >= cLast )
508 nCharIndex += cLast - cFirst;
509 else if( cChar >= cFirst )
510 return nCharIndex + (cChar - cFirst);
511 else
512 break;
515 return -1;
518 sal_UCS4 FontCharMap::GetCharFromIndex( int nIndex ) const
520 // TODO: improve linear walk?
521 const sal_UCS4* pRange = &mpImplFontCharMap->mpRangeCodes[0];
522 for( int i = 0; i < mpImplFontCharMap->mnRangeCount; ++i )
524 sal_UCS4 cFirst = *(pRange++);
525 sal_UCS4 cLast = *(pRange++);
526 nIndex -= cLast - cFirst;
527 if( nIndex < 0 )
528 return (cLast + nIndex);
531 // we can only get here with an out-of-bounds charindex
532 return mpImplFontCharMap->mpRangeCodes[0];
535 int FontCharMap::findRangeIndex( sal_UCS4 cChar ) const
537 int nLower = 0;
538 int nMid = mpImplFontCharMap->mnRangeCount;
539 int nUpper = 2 * mpImplFontCharMap->mnRangeCount - 1;
540 while( nLower < nUpper )
542 if( cChar >= mpImplFontCharMap->mpRangeCodes[ nMid ] )
543 nLower = nMid;
544 else
545 nUpper = nMid - 1;
546 nMid = (nLower + nUpper + 1) / 2;
549 return nMid;
552 int FontCharMap::GetGlyphIndex( sal_UCS4 cChar ) const
554 // return -1 if the object doesn't know the glyph ids
555 if( !mpImplFontCharMap->mpStartGlyphs )
556 return -1;
558 // return 0 if the unicode doesn't have a matching glyph
559 int nRange = findRangeIndex( cChar );
560 // check that we are inside any range
561 if( (nRange == 0) && (cChar < mpImplFontCharMap->mpRangeCodes[0]) ) {
562 // symbol aliasing gives symbol fonts a second chance
563 const bool bSymbolic = cChar <= 0xFF && (mpImplFontCharMap->mpRangeCodes[0]>=0xF000) &&
564 (mpImplFontCharMap->mpRangeCodes[1]<=0xF0FF);
565 if( !bSymbolic )
566 return 0;
567 // check for symbol aliasing (U+F0xx -> U+00xx)
568 cChar |= 0xF000;
569 nRange = findRangeIndex( cChar );
570 if( (nRange == 0) && (cChar < mpImplFontCharMap->mpRangeCodes[0]) ) {
571 return 0;
574 // check that we are inside a range
575 if( (nRange & 1) != 0 )
576 return 0;
578 // get glyph index directly or indirectly
579 int nGlyphIndex = cChar - mpImplFontCharMap->mpRangeCodes[ nRange ];
580 const int nStartIndex = mpImplFontCharMap->mpStartGlyphs[ nRange/2 ];
581 if( nStartIndex >= 0 ) {
582 // the glyph index can be calculated
583 nGlyphIndex += nStartIndex;
584 } else {
585 // the glyphid array has the glyph index
586 nGlyphIndex = mpImplFontCharMap->mpGlyphIds[ nGlyphIndex - nStartIndex];
589 return nGlyphIndex;
592 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */