2 * This file is part of the LibreOffice project.
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 * This file incorporates work covered by the following license notice:
10 * Licensed to the Apache Software Foundation (ASF) under one or more
11 * contributor license agreements. See the NOTICE file distributed
12 * with this work for additional information regarding copyright
13 * ownership. The ASF licenses this file to you under the Apache
14 * License, Version 2.0 (the "License"); you may not use this file
15 * except in compliance with the License. You may obtain a copy of
16 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 #include <vcl/fontcharmap.hxx>
19 #include <impfontcharmap.hxx>
20 #include <rtl/textcvt.h>
21 #include <rtl/textenc.h>
22 #include <sal/log.hxx>
27 CmapResult::CmapResult( bool bSymbolic
,
28 const sal_UCS4
* pRangeCodes
, int nRangeCount
)
29 : mpRangeCodes( pRangeCodes
)
30 , mpStartGlyphs( nullptr)
31 , mpGlyphIds( nullptr)
32 , mnRangeCount( nRangeCount
)
33 , mbSymbolic( bSymbolic
)
37 static ImplFontCharMapRef g_pDefaultImplFontCharMap
;
38 static const sal_UCS4 aDefaultUnicodeRanges
[] = {0x0020,0xD800, 0xE000,0xFFF0};
39 static const sal_UCS4 aDefaultSymbolRanges
[] = {0x0020,0x0100, 0xF020,0xF100};
41 ImplFontCharMap::~ImplFontCharMap()
45 delete[] mpRangeCodes
;
46 delete[] mpStartGlyphs
;
51 ImplFontCharMap::ImplFontCharMap( const CmapResult
& rCR
)
52 : mpRangeCodes( rCR
.mpRangeCodes
)
53 , mpStartGlyphs( rCR
.mpStartGlyphs
)
54 , mpGlyphIds( rCR
.mpGlyphIds
)
55 , mnRangeCount( rCR
.mnRangeCount
)
58 const sal_UCS4
* pRangePtr
= mpRangeCodes
;
59 for( int i
= mnRangeCount
; --i
>= 0; pRangePtr
+= 2 )
61 sal_UCS4 cFirst
= pRangePtr
[0];
62 sal_UCS4 cLast
= pRangePtr
[1];
63 mnCharCount
+= cLast
- cFirst
;
67 ImplFontCharMapRef
const & ImplFontCharMap::getDefaultMap( bool bSymbols
)
69 const sal_UCS4
* pRangeCodes
= aDefaultUnicodeRanges
;
70 int nCodesCount
= SAL_N_ELEMENTS(aDefaultUnicodeRanges
);
73 pRangeCodes
= aDefaultSymbolRanges
;
74 nCodesCount
= SAL_N_ELEMENTS(aDefaultSymbolRanges
);
77 CmapResult
aDefaultCR( bSymbols
, pRangeCodes
, nCodesCount
/2 );
78 g_pDefaultImplFontCharMap
= ImplFontCharMapRef(new ImplFontCharMap(aDefaultCR
));
80 return g_pDefaultImplFontCharMap
;
83 bool ImplFontCharMap::isDefaultMap() const
85 const bool bIsDefault
= (mpRangeCodes
== aDefaultUnicodeRanges
) || (mpRangeCodes
== aDefaultSymbolRanges
);
89 static unsigned GetUInt( const unsigned char* p
) { return((p
[0]<<24)+(p
[1]<<16)+(p
[2]<<8)+p
[3]);}
90 static unsigned GetUShort( const unsigned char* p
){ return((p
[0]<<8) | p
[1]);}
91 static int GetSShort( const unsigned char* p
){ return static_cast<sal_Int16
>((p
[0]<<8)|p
[1]);}
93 // TODO: move CMAP parsing directly into the ImplFontCharMap class
94 bool ParseCMAP( const unsigned char* pCmap
, int nLength
, CmapResult
& rResult
)
96 rResult
.mpRangeCodes
= nullptr;
97 rResult
.mpStartGlyphs
= nullptr;
98 rResult
.mpGlyphIds
= nullptr;
99 rResult
.mnRangeCount
= 0;
100 rResult
.mbRecoded
= false;
101 rResult
.mbSymbolic
= false;
103 // parse the table header and check for validity
104 if( !pCmap
|| (nLength
< 24) )
107 if( GetUShort( pCmap
) != 0x0000 ) // simple check for CMAP corruption
110 int nSubTables
= GetUShort( pCmap
+ 2 );
111 if( (nSubTables
<= 0) || (nLength
< (24 + 8*nSubTables
)) )
114 const unsigned char* pEndValidArea
= pCmap
+ nLength
;
116 // find the most interesting subtable in the CMAP
117 rtl_TextEncoding eRecodeFrom
= RTL_TEXTENCODING_UNICODE
;
121 for( const unsigned char* p
= pCmap
+ 4; --nSubTables
>= 0; p
+= 8 )
123 int nPlatform
= GetUShort( p
);
124 int nEncoding
= GetUShort( p
+2 );
125 int nPlatformEncoding
= (nPlatform
<< 8) + nEncoding
;
128 rtl_TextEncoding eTmpEncoding
= RTL_TEXTENCODING_UNICODE
;
129 switch( nPlatformEncoding
)
131 case 0x000: nValue
= 20; break; // Unicode 1.0
132 case 0x001: nValue
= 21; break; // Unicode 1.1
133 case 0x002: nValue
= 22; break; // iso10646_1993
134 case 0x003: nValue
= 23; break; // UCS-2
135 case 0x004: nValue
= 24; break; // UCS-4
136 case 0x100: nValue
= 22; break; // Mac Unicode<2.0
137 case 0x103: nValue
= 23; break; // Mac Unicode>2.0
138 case 0x300: nValue
= 5; rResult
.mbSymbolic
= true; break; // Win Symbol
139 case 0x301: nValue
= 28; break; // Win UCS-2
140 case 0x30A: nValue
= 29; break; // Win-UCS-4
141 case 0x302: nValue
= 11; eTmpEncoding
= RTL_TEXTENCODING_SHIFT_JIS
; break;
142 case 0x303: nValue
= 12; eTmpEncoding
= RTL_TEXTENCODING_GB_18030
; break;
143 case 0x304: nValue
= 11; eTmpEncoding
= RTL_TEXTENCODING_BIG5
; break;
144 case 0x305: nValue
= 11; eTmpEncoding
= RTL_TEXTENCODING_MS_949
; break;
145 case 0x306: nValue
= 11; eTmpEncoding
= RTL_TEXTENCODING_MS_1361
; break;
146 default: nValue
= 0; break;
149 if( nValue
<= 0 ) // ignore unknown encodings
152 int nTmpOffset
= GetUInt( p
+4 );
153 int nTmpFormat
= GetUShort( pCmap
+ nTmpOffset
);
154 if( nTmpFormat
== 12 ) // 32bit code -> glyph map format
156 else if( nTmpFormat
!= 4 ) // 16bit code -> glyph map format
157 continue; // ignore other formats
159 if( nBestVal
< nValue
)
162 nOffset
= nTmpOffset
;
163 nFormat
= nTmpFormat
;
164 eRecodeFrom
= eTmpEncoding
;
168 // parse the best CMAP subtable
170 sal_UCS4
* pCodePairs
= nullptr;
171 int* pStartGlyphs
= nullptr;
173 std::vector
<sal_uInt16
> aGlyphIdArray
;
174 aGlyphIdArray
.reserve( 0x1000 );
175 aGlyphIdArray
.push_back( 0 );
177 // format 4, the most common 16bit char mapping table
178 if( (nFormat
== 4) && ((nOffset
+16) < nLength
) )
180 int nSegCountX2
= GetUShort( pCmap
+ nOffset
+ 6 );
181 nRangeCount
= nSegCountX2
/2 - 1;
182 pCodePairs
= new sal_UCS4
[ nRangeCount
* 2 ];
183 pStartGlyphs
= new int[ nRangeCount
];
184 const unsigned char* pLimitBase
= pCmap
+ nOffset
+ 14;
185 const unsigned char* pBeginBase
= pLimitBase
+ nSegCountX2
+ 2;
186 const unsigned char* pDeltaBase
= pBeginBase
+ nSegCountX2
;
187 const unsigned char* pOffsetBase
= pDeltaBase
+ nSegCountX2
;
188 sal_UCS4
* pCP
= pCodePairs
;
189 for( int i
= 0; i
< nRangeCount
; ++i
)
191 const sal_UCS4 cMinChar
= GetUShort( pBeginBase
+ 2*i
);
192 const sal_UCS4 cMaxChar
= GetUShort( pLimitBase
+ 2*i
);
193 const int nGlyphDelta
= GetSShort( pDeltaBase
+ 2*i
);
194 const int nRangeOffset
= GetUShort( pOffsetBase
+ 2*i
);
195 if( cMinChar
> cMaxChar
) { // no sane font should trigger this
196 SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
199 if( cMaxChar
== 0xFFFF ) {
200 SAL_WARN("vcl.gdi", "Format 4 char should not be 0xFFFF");
203 if( !nRangeOffset
) {
204 // glyphid can be calculated directly
205 pStartGlyphs
[i
] = (cMinChar
+ nGlyphDelta
) & 0xFFFF;
207 // update the glyphid-array with the glyphs in this range
208 pStartGlyphs
[i
] = -static_cast<int>(aGlyphIdArray
.size());
209 const unsigned char* pGlyphIdPtr
= pOffsetBase
+ 2*i
+ nRangeOffset
;
210 const size_t nRemainingSize
= pEndValidArea
- pGlyphIdPtr
;
211 const size_t nMaxPossibleRecords
= nRemainingSize
/2;
212 if (nMaxPossibleRecords
== 0) { // no sane font should trigger this
213 SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
216 const size_t nMaxLegalChar
= cMinChar
+ nMaxPossibleRecords
-1;
217 if (cMaxChar
> nMaxLegalChar
) { // no sane font should trigger this
218 SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
221 for( sal_UCS4 c
= cMinChar
; c
<= cMaxChar
; ++c
, pGlyphIdPtr
+=2 ) {
222 const int nGlyphIndex
= GetUShort( pGlyphIdPtr
) + nGlyphDelta
;
223 aGlyphIdArray
.push_back( static_cast<sal_uInt16
>(nGlyphIndex
) );
227 *(pCP
++) = cMaxChar
+ 1;
229 nRangeCount
= (pCP
- pCodePairs
) / 2;
231 // format 12, the most common 32bit char mapping table
232 else if( (nFormat
== 12) && ((nOffset
+16) < nLength
) )
234 nRangeCount
= GetUInt( pCmap
+ nOffset
+ 12 );
237 SAL_WARN("vcl.gdi", "negative RangeCount");
241 const int nGroupOffset
= nOffset
+ 16;
242 const int nRemainingLen
= nLength
- nGroupOffset
;
243 const int nMaxPossiblePairs
= nRemainingLen
/ 12;
244 if (nRangeCount
> nMaxPossiblePairs
)
246 SAL_WARN("vcl.gdi", "more code pairs requested then space available");
247 nRangeCount
= nMaxPossiblePairs
;
250 pCodePairs
= new sal_UCS4
[ nRangeCount
* 2 ];
251 pStartGlyphs
= new int[ nRangeCount
];
253 const unsigned char* pGroup
= pCmap
+ nGroupOffset
;
254 sal_UCS4
* pCP
= pCodePairs
;
255 for( int i
= 0; i
< nRangeCount
; ++i
)
257 sal_UCS4 cMinChar
= GetUInt( pGroup
+ 0 );
258 sal_UCS4 cMaxChar
= GetUInt( pGroup
+ 4 );
259 int nGlyphId
= GetUInt( pGroup
+ 8 );
262 if( cMinChar
> cMaxChar
) { // no sane font should trigger this
263 SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
268 *(pCP
++) = cMaxChar
+ 1;
269 pStartGlyphs
[i
] = nGlyphId
;
271 nRangeCount
= (pCP
- pCodePairs
) / 2;
274 // check if any subtable resulted in something usable
275 if( nRangeCount
<= 0 )
278 delete[] pStartGlyphs
;
280 // even when no CMAP is available we know it for symbol fonts
281 if( rResult
.mbSymbolic
)
283 pCodePairs
= new sal_UCS4
[4];
284 pCodePairs
[0] = 0x0020; // aliased symbols
285 pCodePairs
[1] = 0x0100;
286 pCodePairs
[2] = 0xF020; // original symbols
287 pCodePairs
[3] = 0xF100;
288 rResult
.mpRangeCodes
= pCodePairs
;
289 rResult
.mnRangeCount
= 2;
296 // recode the code ranges to their unicode encoded ranges if needed
297 rtl_TextToUnicodeConverter aConverter
= nullptr;
298 rtl_UnicodeToTextContext aCvtContext
= nullptr;
300 rResult
.mbRecoded
= ( eRecodeFrom
!= RTL_TEXTENCODING_UNICODE
);
301 if( rResult
.mbRecoded
)
303 aConverter
= rtl_createTextToUnicodeConverter( eRecodeFrom
);
304 aCvtContext
= rtl_createTextToUnicodeContext( aConverter
);
307 if( aConverter
&& aCvtContext
)
309 // determine the set of supported code points from encoded ranges
310 std::set
<sal_UCS4
> aSupportedCodePoints
;
312 static const int NINSIZE
= 64;
313 static const int NOUTSIZE
= 64;
314 sal_Char cCharsInp
[ NINSIZE
];
315 sal_Unicode cCharsOut
[ NOUTSIZE
];
316 sal_UCS4
* pCP
= pCodePairs
;
317 for( int i
= 0; i
< nRangeCount
; ++i
)
319 sal_UCS4 cMin
= *(pCP
++);
320 sal_UCS4 cEnd
= *(pCP
++);
324 for(; (cMin
< cEnd
) && (j
< NINSIZE
); ++cMin
)
327 cCharsInp
[ j
++ ] = static_cast<sal_Char
>(cMin
>> 8);
328 if( (cMin
>= 0x0100) || (cMin
< 0x00A0) )
329 cCharsInp
[ j
++ ] = static_cast<sal_Char
>(cMin
);
333 sal_Size nSrcCvtBytes
;
334 int nOutLen
= rtl_convertTextToUnicode(
335 aConverter
, aCvtContext
,
336 cCharsInp
, j
, cCharsOut
, NOUTSIZE
,
337 RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE
338 | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE
,
339 &nCvtInfo
, &nSrcCvtBytes
);
341 for( j
= 0; j
< nOutLen
; ++j
)
342 aSupportedCodePoints
.insert( cCharsOut
[j
] );
346 rtl_destroyTextToUnicodeConverter( aCvtContext
);
347 rtl_destroyTextToUnicodeConverter( aConverter
);
349 // convert the set of supported code points to ranges
350 std::vector
<sal_UCS4
> aSupportedRanges
;
352 for (auto const& supportedPoint
: aSupportedCodePoints
)
354 if( aSupportedRanges
.empty()
355 || (aSupportedRanges
.back() != supportedPoint
) )
357 // add new range beginning with current unicode
358 aSupportedRanges
.push_back(supportedPoint
);
359 aSupportedRanges
.push_back( 0 );
362 // extend existing range to include current unicode
363 aSupportedRanges
.back() = supportedPoint
+ 1;
366 // glyph mapping for non-unicode fonts not implemented
367 delete[] pStartGlyphs
;
368 pStartGlyphs
= nullptr;
369 aGlyphIdArray
.clear();
371 // make a pCodePairs array using the vector from above
373 nRangeCount
= aSupportedRanges
.size() / 2;
374 if( nRangeCount
<= 0 )
376 pCodePairs
= new sal_UCS4
[ nRangeCount
* 2 ];
378 for (auto const& supportedRange
: aSupportedRanges
)
379 *(pCP
++) = supportedRange
;
382 // prepare the glyphid-array if needed
383 // TODO: merge ranges if they are close enough?
384 sal_uInt16
* pGlyphIds
= nullptr;
385 if( !aGlyphIdArray
.empty())
387 pGlyphIds
= new sal_uInt16
[ aGlyphIdArray
.size() ];
388 sal_uInt16
* pOut
= pGlyphIds
;
389 for (auto const& glyphId
: aGlyphIdArray
)
393 // update the result struct
394 rResult
.mpRangeCodes
= pCodePairs
;
395 rResult
.mpStartGlyphs
= pStartGlyphs
;
396 rResult
.mnRangeCount
= nRangeCount
;
397 rResult
.mpGlyphIds
= pGlyphIds
;
401 FontCharMap::FontCharMap()
402 : mpImplFontCharMap( ImplFontCharMap::getDefaultMap() )
406 FontCharMap::FontCharMap( ImplFontCharMapRef
const & pIFCMap
)
407 : mpImplFontCharMap( pIFCMap
)
411 FontCharMap::FontCharMap( const CmapResult
& rCR
)
412 : mpImplFontCharMap(new ImplFontCharMap(rCR
))
416 FontCharMap::~FontCharMap()
418 mpImplFontCharMap
= nullptr;
421 FontCharMapRef
FontCharMap::GetDefaultMap( bool bSymbol
)
423 FontCharMapRef
xFontCharMap( new FontCharMap( ImplFontCharMap::getDefaultMap( bSymbol
) ) );
427 bool FontCharMap::IsDefaultMap() const
429 return mpImplFontCharMap
->isDefaultMap();
432 int FontCharMap::GetCharCount() const
434 return mpImplFontCharMap
->mnCharCount
;
437 int FontCharMap::CountCharsInRange( sal_UCS4 cMin
, sal_UCS4 cMax
) const
441 // find and adjust range and char count for cMin
442 int nRangeMin
= findRangeIndex( cMin
);
445 else if( cMin
> mpImplFontCharMap
->mpRangeCodes
[ nRangeMin
] )
446 nCount
-= cMin
- mpImplFontCharMap
->mpRangeCodes
[ nRangeMin
];
448 // find and adjust range and char count for cMax
449 int nRangeMax
= findRangeIndex( cMax
);
453 nCount
-= mpImplFontCharMap
->mpRangeCodes
[ nRangeMax
+1 ] - cMax
- 1;
455 // count chars in complete ranges between cMin and cMax
456 for( int i
= nRangeMin
; i
<= nRangeMax
; i
+=2 )
457 nCount
+= mpImplFontCharMap
->mpRangeCodes
[i
+1] - mpImplFontCharMap
->mpRangeCodes
[i
];
462 bool FontCharMap::HasChar( sal_UCS4 cChar
) const
464 bool bHasChar
= false;
466 if( mpImplFontCharMap
->mpStartGlyphs
== nullptr ) { // only the char-ranges are known
467 const int nRange
= findRangeIndex( cChar
);
468 if( nRange
==0 && cChar
< mpImplFontCharMap
->mpRangeCodes
[0] )
470 bHasChar
= ((nRange
& 1) == 0); // inside a range
471 } else { // glyph mapping is available
472 const int nGlyphIndex
= GetGlyphIndex( cChar
);
473 bHasChar
= (nGlyphIndex
!= 0); // not the notdef-glyph
479 sal_UCS4
FontCharMap::GetFirstChar() const
481 return mpImplFontCharMap
->mpRangeCodes
[0];
484 sal_UCS4
FontCharMap::GetLastChar() const
486 return (mpImplFontCharMap
->mpRangeCodes
[ 2*mpImplFontCharMap
->mnRangeCount
-1 ] - 1);
489 sal_UCS4
FontCharMap::GetNextChar( sal_UCS4 cChar
) const
491 if( cChar
< GetFirstChar() )
492 return GetFirstChar();
493 if( cChar
>= GetLastChar() )
494 return GetLastChar();
496 int nRange
= findRangeIndex( cChar
+ 1 );
497 if( nRange
& 1 ) // outside of range?
498 return mpImplFontCharMap
->mpRangeCodes
[ nRange
+ 1 ]; // => first in next range
502 sal_UCS4
FontCharMap::GetPrevChar( sal_UCS4 cChar
) const
504 if( cChar
<= GetFirstChar() )
505 return GetFirstChar();
506 if( cChar
> GetLastChar() )
507 return GetLastChar();
509 int nRange
= findRangeIndex( cChar
- 1 );
510 if( nRange
& 1 ) // outside a range?
511 return (mpImplFontCharMap
->mpRangeCodes
[ nRange
] - 1); // => last in prev range
515 int FontCharMap::GetIndexFromChar( sal_UCS4 cChar
) const
517 // TODO: improve linear walk?
519 const sal_UCS4
* pRange
= &mpImplFontCharMap
->mpRangeCodes
[0];
520 for( int i
= 0; i
< mpImplFontCharMap
->mnRangeCount
; ++i
)
522 sal_UCS4 cFirst
= *(pRange
++);
523 sal_UCS4 cLast
= *(pRange
++);
525 nCharIndex
+= cLast
- cFirst
;
526 else if( cChar
>= cFirst
)
527 return nCharIndex
+ (cChar
- cFirst
);
535 sal_UCS4
FontCharMap::GetCharFromIndex( int nIndex
) const
537 // TODO: improve linear walk?
538 const sal_UCS4
* pRange
= &mpImplFontCharMap
->mpRangeCodes
[0];
539 for( int i
= 0; i
< mpImplFontCharMap
->mnRangeCount
; ++i
)
541 sal_UCS4 cFirst
= *(pRange
++);
542 sal_UCS4 cLast
= *(pRange
++);
543 nIndex
-= cLast
- cFirst
;
545 return (cLast
+ nIndex
);
548 // we can only get here with an out-of-bounds charindex
549 return mpImplFontCharMap
->mpRangeCodes
[0];
552 int FontCharMap::findRangeIndex( sal_UCS4 cChar
) const
555 int nMid
= mpImplFontCharMap
->mnRangeCount
;
556 int nUpper
= 2 * mpImplFontCharMap
->mnRangeCount
- 1;
557 while( nLower
< nUpper
)
559 if( cChar
>= mpImplFontCharMap
->mpRangeCodes
[ nMid
] )
563 nMid
= (nLower
+ nUpper
+ 1) / 2;
569 int FontCharMap::GetGlyphIndex( sal_UCS4 cChar
) const
571 // return -1 if the object doesn't know the glyph ids
572 if( !mpImplFontCharMap
->mpStartGlyphs
)
575 // return 0 if the unicode doesn't have a matching glyph
576 int nRange
= findRangeIndex( cChar
);
577 // check that we are inside any range
578 if( (nRange
== 0) && (cChar
< mpImplFontCharMap
->mpRangeCodes
[0]) ) {
579 // symbol aliasing gives symbol fonts a second chance
580 const bool bSymbolic
= cChar
<= 0xFF && (mpImplFontCharMap
->mpRangeCodes
[0]>=0xF000) &&
581 (mpImplFontCharMap
->mpRangeCodes
[1]<=0xF0FF);
584 // check for symbol aliasing (U+F0xx -> U+00xx)
586 nRange
= findRangeIndex( cChar
);
587 if( (nRange
== 0) && (cChar
< mpImplFontCharMap
->mpRangeCodes
[0]) ) {
591 // check that we are inside a range
592 if( (nRange
& 1) != 0 )
595 // get glyph index directly or indirectly
596 int nGlyphIndex
= cChar
- mpImplFontCharMap
->mpRangeCodes
[ nRange
];
597 const int nStartIndex
= mpImplFontCharMap
->mpStartGlyphs
[ nRange
/2 ];
598 if( nStartIndex
>= 0 ) {
599 // the glyph index can be calculated
600 nGlyphIndex
+= nStartIndex
;
602 // the glyphid array has the glyph index
603 nGlyphIndex
= mpImplFontCharMap
->mpGlyphIds
[ nGlyphIndex
- nStartIndex
];
609 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */