build fix: no comphelper/profilezone.hxx in this branch
[LibreOffice.git] / vcl / source / font / fontcharmap.cxx
blob273894641615563af82d78b5a79b2628830067cd
1 /*
2 * This file is part of the LibreOffice project.
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 * This file incorporates work covered by the following license notice:
10 * Licensed to the Apache Software Foundation (ASF) under one or more
11 * contributor license agreements. See the NOTICE file distributed
12 * with this work for additional information regarding copyright
13 * ownership. The ASF licenses this file to you under the Apache
14 * License, Version 2.0 (the "License"); you may not use this file
15 * except in compliance with the License. You may obtain a copy of
16 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 #include <vcl/fontcharmap.hxx>
19 #include <fontinstance.hxx>
20 #include <impfontcharmap.hxx>
22 #include <vector>
23 #include <set>
25 CmapResult::CmapResult( bool bSymbolic,
26 const sal_UCS4* pRangeCodes, int nRangeCount )
27 : mpRangeCodes( pRangeCodes)
28 , mpStartGlyphs( nullptr)
29 , mpGlyphIds( nullptr)
30 , mnRangeCount( nRangeCount)
31 , mbSymbolic( bSymbolic)
32 , mbRecoded( false)
35 static ImplFontCharMapRef xDefaultImplFontCharMap;
36 static const sal_UCS4 aDefaultUnicodeRanges[] = {0x0020,0xD800, 0xE000,0xFFF0};
37 static const sal_UCS4 aDefaultSymbolRanges[] = {0x0020,0x0100, 0xF020,0xF100};
39 ImplFontCharMap::~ImplFontCharMap()
41 if( !isDefaultMap() )
43 delete[] mpRangeCodes;
44 delete[] mpStartGlyphs;
45 delete[] mpGlyphIds;
49 ImplFontCharMap::ImplFontCharMap( const CmapResult& rCR )
50 : mpRangeCodes( rCR.mpRangeCodes )
51 , mpStartGlyphs( rCR.mpStartGlyphs )
52 , mpGlyphIds( rCR.mpGlyphIds )
53 , mnRangeCount( rCR.mnRangeCount )
54 , mnCharCount( 0 )
56 const sal_UCS4* pRangePtr = mpRangeCodes;
57 for( int i = mnRangeCount; --i >= 0; pRangePtr += 2 )
59 sal_UCS4 cFirst = pRangePtr[0];
60 sal_UCS4 cLast = pRangePtr[1];
61 mnCharCount += cLast - cFirst;
65 ImplFontCharMapRef const & ImplFontCharMap::getDefaultMap( bool bSymbols )
67 const sal_UCS4* pRangeCodes = aDefaultUnicodeRanges;
68 int nCodesCount = sizeof(aDefaultUnicodeRanges) / sizeof(*pRangeCodes);
69 if( bSymbols )
71 pRangeCodes = aDefaultSymbolRanges;
72 nCodesCount = sizeof(aDefaultSymbolRanges) / sizeof(*pRangeCodes);
75 CmapResult aDefaultCR( bSymbols, pRangeCodes, nCodesCount/2 );
76 xDefaultImplFontCharMap = ImplFontCharMapRef(new ImplFontCharMap(aDefaultCR));
78 return xDefaultImplFontCharMap;
81 bool ImplFontCharMap::isDefaultMap() const
83 const bool bIsDefault = (mpRangeCodes == aDefaultUnicodeRanges) || (mpRangeCodes == aDefaultSymbolRanges);
84 return bIsDefault;
87 static unsigned GetUInt( const unsigned char* p ) { return((p[0]<<24)+(p[1]<<16)+(p[2]<<8)+p[3]);}
88 static unsigned Getsal_uInt16( const unsigned char* p ){ return((p[0]<<8) | p[1]);}
89 static int GetSShort( const unsigned char* p ){ return static_cast<sal_Int16>((p[0]<<8)|p[1]);}
91 // TODO: move CMAP parsing directly into the ImplFontCharMap class
92 bool ParseCMAP( const unsigned char* pCmap, int nLength, CmapResult& rResult )
94 rResult.mpRangeCodes = nullptr;
95 rResult.mpStartGlyphs= nullptr;
96 rResult.mpGlyphIds = nullptr;
97 rResult.mnRangeCount = 0;
98 rResult.mbRecoded = false;
99 rResult.mbSymbolic = false;
101 // parse the table header and check for validity
102 if( !pCmap || (nLength < 24) )
103 return false;
105 if( Getsal_uInt16( pCmap ) != 0x0000 ) // simple check for CMAP corruption
106 return false;
108 int nSubTables = Getsal_uInt16( pCmap + 2 );
109 if( (nSubTables <= 0) || (nLength < (24 + 8*nSubTables)) )
110 return false;
112 const unsigned char* pEndValidArea = pCmap + nLength;
114 // find the most interesting subtable in the CMAP
115 rtl_TextEncoding eRecodeFrom = RTL_TEXTENCODING_UNICODE;
116 int nOffset = 0;
117 int nFormat = -1;
118 int nBestVal = 0;
119 for( const unsigned char* p = pCmap + 4; --nSubTables >= 0; p += 8 )
121 int nPlatform = Getsal_uInt16( p );
122 int nEncoding = Getsal_uInt16( p+2 );
123 int nPlatformEncoding = (nPlatform << 8) + nEncoding;
125 int nValue;
126 rtl_TextEncoding eTmpEncoding = RTL_TEXTENCODING_UNICODE;
127 switch( nPlatformEncoding )
129 case 0x000: nValue = 20; break; // Unicode 1.0
130 case 0x001: nValue = 21; break; // Unicode 1.1
131 case 0x002: nValue = 22; break; // iso10646_1993
132 case 0x003: nValue = 23; break; // UCS-2
133 case 0x004: nValue = 24; break; // UCS-4
134 case 0x100: nValue = 22; break; // Mac Unicode<2.0
135 case 0x103: nValue = 23; break; // Mac Unicode>2.0
136 case 0x300: nValue = 5; rResult.mbSymbolic = true; break; // Win Symbol
137 case 0x301: nValue = 28; break; // Win UCS-2
138 case 0x30A: nValue = 29; break; // Win-UCS-4
139 case 0x302: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_SHIFT_JIS; break;
140 case 0x303: nValue = 12; eTmpEncoding = RTL_TEXTENCODING_GB_18030; break;
141 case 0x304: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_BIG5; break;
142 case 0x305: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_MS_949; break;
143 case 0x306: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_MS_1361; break;
144 default: nValue = 0; break;
147 if( nValue <= 0 ) // ignore unknown encodings
148 continue;
150 int nTmpOffset = GetUInt( p+4 );
151 int nTmpFormat = Getsal_uInt16( pCmap + nTmpOffset );
152 if( nTmpFormat == 12 ) // 32bit code -> glyph map format
153 nValue += 3;
154 else if( nTmpFormat != 4 ) // 16bit code -> glyph map format
155 continue; // ignore other formats
157 if( nBestVal < nValue )
159 nBestVal = nValue;
160 nOffset = nTmpOffset;
161 nFormat = nTmpFormat;
162 eRecodeFrom = eTmpEncoding;
166 // parse the best CMAP subtable
167 int nRangeCount = 0;
168 sal_UCS4* pCodePairs = nullptr;
169 int* pStartGlyphs = nullptr;
171 std::vector<sal_uInt16> aGlyphIdArray;
172 aGlyphIdArray.reserve( 0x1000 );
173 aGlyphIdArray.push_back( 0 );
175 // format 4, the most common 16bit char mapping table
176 if( (nFormat == 4) && ((nOffset+16) < nLength) )
178 int nSegCountX2 = Getsal_uInt16( pCmap + nOffset + 6 );
179 nRangeCount = nSegCountX2/2 - 1;
180 pCodePairs = new sal_UCS4[ nRangeCount * 2 ];
181 pStartGlyphs = new int[ nRangeCount ];
182 const unsigned char* pLimitBase = pCmap + nOffset + 14;
183 const unsigned char* pBeginBase = pLimitBase + nSegCountX2 + 2;
184 const unsigned char* pDeltaBase = pBeginBase + nSegCountX2;
185 const unsigned char* pOffsetBase = pDeltaBase + nSegCountX2;
186 sal_UCS4* pCP = pCodePairs;
187 for( int i = 0; i < nRangeCount; ++i )
189 const sal_UCS4 cMinChar = Getsal_uInt16( pBeginBase + 2*i );
190 const sal_UCS4 cMaxChar = Getsal_uInt16( pLimitBase + 2*i );
191 const int nGlyphDelta = GetSShort( pDeltaBase + 2*i );
192 const int nRangeOffset = Getsal_uInt16( pOffsetBase + 2*i );
193 if( cMinChar > cMaxChar ) { // no sane font should trigger this
194 SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
195 break;
197 if( cMaxChar == 0xFFFF ) {
198 SAL_WARN("vcl.gdi", "Format 4 char should not be 0xFFFF");
199 break;
201 if( !nRangeOffset ) {
202 // glyphid can be calculated directly
203 pStartGlyphs[i] = (cMinChar + nGlyphDelta) & 0xFFFF;
204 } else {
205 // update the glyphid-array with the glyphs in this range
206 pStartGlyphs[i] = -(int)aGlyphIdArray.size();
207 const unsigned char* pGlyphIdPtr = pOffsetBase + 2*i + nRangeOffset;
208 const size_t nRemainingSize = pEndValidArea - pGlyphIdPtr;
209 const size_t nMaxPossibleRecords = nRemainingSize/2;
210 if (nMaxPossibleRecords == 0) { // no sane font should trigger this
211 SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
212 break;
214 const size_t nMaxLegalChar = cMinChar + nMaxPossibleRecords-1;
215 if (cMaxChar > nMaxLegalChar) { // no sane font should trigger this
216 SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
217 break;
219 for( sal_UCS4 c = cMinChar; c <= cMaxChar; ++c, pGlyphIdPtr+=2 ) {
220 const int nGlyphIndex = Getsal_uInt16( pGlyphIdPtr ) + nGlyphDelta;
221 aGlyphIdArray.push_back( static_cast<sal_uInt16>(nGlyphIndex) );
224 *(pCP++) = cMinChar;
225 *(pCP++) = cMaxChar + 1;
227 nRangeCount = (pCP - pCodePairs) / 2;
229 // format 12, the most common 32bit char mapping table
230 else if( (nFormat == 12) && ((nOffset+16) < nLength) )
232 nRangeCount = GetUInt( pCmap + nOffset + 12 );
233 pCodePairs = new sal_UCS4[ nRangeCount * 2 ];
234 pStartGlyphs = new int[ nRangeCount ];
235 const unsigned char* pGroup = pCmap + nOffset + 16;
236 sal_UCS4* pCP = pCodePairs;
237 for( int i = 0; i < nRangeCount; ++i )
239 sal_UCS4 cMinChar = GetUInt( pGroup + 0 );
240 sal_UCS4 cMaxChar = GetUInt( pGroup + 4 );
241 int nGlyphId = GetUInt( pGroup + 8 );
242 pGroup += 12;
244 if( cMinChar > cMaxChar ) { // no sane font should trigger this
245 SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
246 break;
249 *(pCP++) = cMinChar;
250 *(pCP++) = cMaxChar + 1;
251 pStartGlyphs[i] = nGlyphId;
253 nRangeCount = (pCP - pCodePairs) / 2;
256 // check if any subtable resulted in something usable
257 if( nRangeCount <= 0 )
259 delete[] pCodePairs;
260 delete[] pStartGlyphs;
262 // even when no CMAP is available we know it for symbol fonts
263 if( rResult.mbSymbolic )
265 pCodePairs = new sal_UCS4[4];
266 pCodePairs[0] = 0x0020; // aliased symbols
267 pCodePairs[1] = 0x0100;
268 pCodePairs[2] = 0xF020; // original symbols
269 pCodePairs[3] = 0xF100;
270 rResult.mpRangeCodes = pCodePairs;
271 rResult.mnRangeCount = 2;
272 return true;
275 return false;
278 // recode the code ranges to their unicode encoded ranges if needed
279 rtl_TextToUnicodeConverter aConverter = nullptr;
280 rtl_UnicodeToTextContext aCvtContext = nullptr;
282 rResult.mbRecoded = ( eRecodeFrom != RTL_TEXTENCODING_UNICODE );
283 if( rResult.mbRecoded )
285 aConverter = rtl_createTextToUnicodeConverter( eRecodeFrom );
286 aCvtContext = rtl_createTextToUnicodeContext( aConverter );
289 if( aConverter && aCvtContext )
291 // determine the set of supported unicodes from encoded ranges
292 std::set<sal_UCS4> aSupportedUnicodes;
294 static const int NINSIZE = 64;
295 static const int NOUTSIZE = 64;
296 sal_Char cCharsInp[ NINSIZE ];
297 sal_Unicode cCharsOut[ NOUTSIZE ];
298 sal_UCS4* pCP = pCodePairs;
299 for( int i = 0; i < nRangeCount; ++i )
301 sal_UCS4 cMin = *(pCP++);
302 sal_UCS4 cEnd = *(pCP++);
303 while( cMin < cEnd )
305 int j = 0;
306 for(; (cMin < cEnd) && (j < NINSIZE); ++cMin )
308 if( cMin >= 0x0100 )
309 cCharsInp[ j++ ] = static_cast<sal_Char>(cMin >> 8);
310 if( (cMin >= 0x0100) || (cMin < 0x00A0) )
311 cCharsInp[ j++ ] = static_cast<sal_Char>(cMin);
314 sal_uInt32 nCvtInfo;
315 sal_Size nSrcCvtBytes;
316 int nOutLen = rtl_convertTextToUnicode(
317 aConverter, aCvtContext,
318 cCharsInp, j, cCharsOut, NOUTSIZE,
319 RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE
320 | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE,
321 &nCvtInfo, &nSrcCvtBytes );
323 for( j = 0; j < nOutLen; ++j )
324 aSupportedUnicodes.insert( cCharsOut[j] );
328 rtl_destroyTextToUnicodeConverter( aCvtContext );
329 rtl_destroyTextToUnicodeConverter( aConverter );
331 // convert the set of supported unicodes to ranges
332 std::vector<sal_UCS4> aSupportedRanges;
334 std::set<sal_UCS4>::const_iterator itChar = aSupportedUnicodes.begin();
335 for(; itChar != aSupportedUnicodes.end(); ++itChar )
337 if( aSupportedRanges.empty()
338 || (aSupportedRanges.back() != *itChar) )
340 // add new range beginning with current unicode
341 aSupportedRanges.push_back( *itChar );
342 aSupportedRanges.push_back( 0 );
345 // extend existing range to include current unicode
346 aSupportedRanges.back() = *itChar + 1;
349 // glyph mapping for non-unicode fonts not implemented
350 delete[] pStartGlyphs;
351 pStartGlyphs = nullptr;
352 aGlyphIdArray.clear();
354 // make a pCodePairs array using the vector from above
355 delete[] pCodePairs;
356 nRangeCount = aSupportedRanges.size() / 2;
357 if( nRangeCount <= 0 )
358 return false;
359 pCodePairs = new sal_UCS4[ nRangeCount * 2 ];
360 std::vector<sal_UCS4>::const_iterator itInt = aSupportedRanges.begin();
361 for( pCP = pCodePairs; itInt != aSupportedRanges.end(); ++itInt )
362 *(pCP++) = *itInt;
365 // prepare the glyphid-array if needed
366 // TODO: merge ranges if they are close enough?
367 sal_uInt16* pGlyphIds = nullptr;
368 if( !aGlyphIdArray.empty())
370 pGlyphIds = new sal_uInt16[ aGlyphIdArray.size() ];
371 sal_uInt16* pOut = pGlyphIds;
372 std::vector<sal_uInt16>::const_iterator it = aGlyphIdArray.begin();
373 while( it != aGlyphIdArray.end() )
374 *(pOut++) = *(it++);
377 // update the result struct
378 rResult.mpRangeCodes = pCodePairs;
379 rResult.mpStartGlyphs = pStartGlyphs;
380 rResult.mnRangeCount = nRangeCount;
381 rResult.mpGlyphIds = pGlyphIds;
382 return true;
385 FontCharMap::FontCharMap()
386 : mpImplFontCharMap( ImplFontCharMap::getDefaultMap() )
390 FontCharMap::FontCharMap( ImplFontCharMapRef const & pIFCMap )
391 : mpImplFontCharMap( pIFCMap )
395 FontCharMap::FontCharMap( const CmapResult& rCR )
396 : mpImplFontCharMap(new ImplFontCharMap(rCR))
400 FontCharMap::~FontCharMap()
402 mpImplFontCharMap = nullptr;
405 FontCharMapRef FontCharMap::GetDefaultMap( bool bSymbol )
407 FontCharMapRef xFontCharMap( new FontCharMap( ImplFontCharMap::getDefaultMap( bSymbol ) ) );
408 return xFontCharMap;
411 bool FontCharMap::IsDefaultMap() const
413 return mpImplFontCharMap->isDefaultMap();
416 int FontCharMap::GetCharCount() const
418 return mpImplFontCharMap->mnCharCount;
421 int FontCharMap::CountCharsInRange( sal_UCS4 cMin, sal_UCS4 cMax ) const
423 int nCount = 0;
425 // find and adjust range and char count for cMin
426 int nRangeMin = findRangeIndex( cMin );
427 if( nRangeMin & 1 )
428 ++nRangeMin;
429 else if( cMin > mpImplFontCharMap->mpRangeCodes[ nRangeMin ] )
430 nCount -= cMin - mpImplFontCharMap->mpRangeCodes[ nRangeMin ];
432 // find and adjust range and char count for cMax
433 int nRangeMax = findRangeIndex( cMax );
434 if( nRangeMax & 1 )
435 --nRangeMax;
436 else
437 nCount -= mpImplFontCharMap->mpRangeCodes[ nRangeMax+1 ] - cMax - 1;
439 // count chars in complete ranges between cMin and cMax
440 for( int i = nRangeMin; i <= nRangeMax; i+=2 )
441 nCount += mpImplFontCharMap->mpRangeCodes[i+1] - mpImplFontCharMap->mpRangeCodes[i];
443 return nCount;
446 bool FontCharMap::HasChar( sal_UCS4 cChar ) const
448 bool bHasChar = false;
450 if( mpImplFontCharMap->mpStartGlyphs == nullptr ) { // only the char-ranges are known
451 const int nRange = findRangeIndex( cChar );
452 if( nRange==0 && cChar < mpImplFontCharMap->mpRangeCodes[0] )
453 return false;
454 bHasChar = ((nRange & 1) == 0); // inside a range
455 } else { // glyph mapping is available
456 const int nGlyphIndex = GetGlyphIndex( cChar );
457 bHasChar = (nGlyphIndex != 0); // not the notdef-glyph
460 return bHasChar;
463 sal_UCS4 FontCharMap::GetFirstChar() const
465 return mpImplFontCharMap->mpRangeCodes[0];
468 sal_UCS4 FontCharMap::GetLastChar() const
470 return (mpImplFontCharMap->mpRangeCodes[ 2*mpImplFontCharMap->mnRangeCount-1 ] - 1);
473 sal_UCS4 FontCharMap::GetNextChar( sal_UCS4 cChar ) const
475 if( cChar < GetFirstChar() )
476 return GetFirstChar();
477 if( cChar >= GetLastChar() )
478 return GetLastChar();
480 int nRange = findRangeIndex( cChar + 1 );
481 if( nRange & 1 ) // outside of range?
482 return mpImplFontCharMap->mpRangeCodes[ nRange + 1 ]; // => first in next range
483 return (cChar + 1);
486 sal_UCS4 FontCharMap::GetPrevChar( sal_UCS4 cChar ) const
488 if( cChar <= GetFirstChar() )
489 return GetFirstChar();
490 if( cChar > GetLastChar() )
491 return GetLastChar();
493 int nRange = findRangeIndex( cChar - 1 );
494 if( nRange & 1 ) // outside a range?
495 return (mpImplFontCharMap->mpRangeCodes[ nRange ] - 1); // => last in prev range
496 return (cChar - 1);
499 int FontCharMap::GetIndexFromChar( sal_UCS4 cChar ) const
501 // TODO: improve linear walk?
502 int nCharIndex = 0;
503 const sal_UCS4* pRange = &mpImplFontCharMap->mpRangeCodes[0];
504 for( int i = 0; i < mpImplFontCharMap->mnRangeCount; ++i )
506 sal_UCS4 cFirst = *(pRange++);
507 sal_UCS4 cLast = *(pRange++);
508 if( cChar >= cLast )
509 nCharIndex += cLast - cFirst;
510 else if( cChar >= cFirst )
511 return nCharIndex + (cChar - cFirst);
512 else
513 break;
516 return -1;
519 sal_UCS4 FontCharMap::GetCharFromIndex( int nIndex ) const
521 // TODO: improve linear walk?
522 const sal_UCS4* pRange = &mpImplFontCharMap->mpRangeCodes[0];
523 for( int i = 0; i < mpImplFontCharMap->mnRangeCount; ++i )
525 sal_UCS4 cFirst = *(pRange++);
526 sal_UCS4 cLast = *(pRange++);
527 nIndex -= cLast - cFirst;
528 if( nIndex < 0 )
529 return (cLast + nIndex);
532 // we can only get here with an out-of-bounds charindex
533 return mpImplFontCharMap->mpRangeCodes[0];
536 int FontCharMap::findRangeIndex( sal_UCS4 cChar ) const
538 int nLower = 0;
539 int nMid = mpImplFontCharMap->mnRangeCount;
540 int nUpper = 2 * mpImplFontCharMap->mnRangeCount - 1;
541 while( nLower < nUpper )
543 if( cChar >= mpImplFontCharMap->mpRangeCodes[ nMid ] )
544 nLower = nMid;
545 else
546 nUpper = nMid - 1;
547 nMid = (nLower + nUpper + 1) / 2;
550 return nMid;
553 int FontCharMap::GetGlyphIndex( sal_UCS4 cChar ) const
555 // return -1 if the object doesn't know the glyph ids
556 if( !mpImplFontCharMap->mpStartGlyphs )
557 return -1;
559 // return 0 if the unicode doesn't have a matching glyph
560 int nRange = findRangeIndex( cChar );
561 // check that we are inside any range
562 if( (nRange == 0) && (cChar < mpImplFontCharMap->mpRangeCodes[0]) ) {
563 // symbol aliasing gives symbol fonts a second chance
564 const bool bSymbolic = cChar <= 0xFF && (mpImplFontCharMap->mpRangeCodes[0]>=0xF000) &&
565 (mpImplFontCharMap->mpRangeCodes[1]<=0xF0FF);
566 if( !bSymbolic )
567 return 0;
568 // check for symbol aliasing (U+F0xx -> U+00xx)
569 cChar |= 0xF000;
570 nRange = findRangeIndex( cChar );
571 if( (nRange == 0) && (cChar < mpImplFontCharMap->mpRangeCodes[0]) ) {
572 return 0;
575 // check that we are inside a range
576 if( (nRange & 1) != 0 )
577 return 0;
579 // get glyph index directly or indirectly
580 int nGlyphIndex = cChar - mpImplFontCharMap->mpRangeCodes[ nRange ];
581 const int nStartIndex = mpImplFontCharMap->mpStartGlyphs[ nRange/2 ];
582 if( nStartIndex >= 0 ) {
583 // the glyph index can be calculated
584 nGlyphIndex += nStartIndex;
585 } else {
586 // the glyphid array has the glyph index
587 nGlyphIndex = mpImplFontCharMap->mpGlyphIds[ nGlyphIndex - nStartIndex];
590 return nGlyphIndex;
593 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */