2 * This file is part of the LibreOffice project.
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 * This file incorporates work covered by the following license notice:
10 * Licensed to the Apache Software Foundation (ASF) under one or more
11 * contributor license agreements. See the NOTICE file distributed
12 * with this work for additional information regarding copyright
13 * ownership. The ASF licenses this file to you under the Apache
14 * License, Version 2.0 (the "License"); you may not use this file
15 * except in compliance with the License. You may obtain a copy of
16 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 #include <vcl/fontcharmap.hxx>
19 #include <fontinstance.hxx>
20 #include <impfontcharmap.hxx>
25 CmapResult::CmapResult( bool bSymbolic
,
26 const sal_UCS4
* pRangeCodes
, int nRangeCount
)
27 : mpRangeCodes( pRangeCodes
)
28 , mpStartGlyphs( nullptr)
29 , mpGlyphIds( nullptr)
30 , mnRangeCount( nRangeCount
)
31 , mbSymbolic( bSymbolic
)
35 static ImplFontCharMapRef xDefaultImplFontCharMap
;
36 static const sal_UCS4 aDefaultUnicodeRanges
[] = {0x0020,0xD800, 0xE000,0xFFF0};
37 static const sal_UCS4 aDefaultSymbolRanges
[] = {0x0020,0x0100, 0xF020,0xF100};
39 ImplFontCharMap::~ImplFontCharMap()
43 delete[] mpRangeCodes
;
44 delete[] mpStartGlyphs
;
49 ImplFontCharMap::ImplFontCharMap( const CmapResult
& rCR
)
50 : mpRangeCodes( rCR
.mpRangeCodes
)
51 , mpStartGlyphs( rCR
.mpStartGlyphs
)
52 , mpGlyphIds( rCR
.mpGlyphIds
)
53 , mnRangeCount( rCR
.mnRangeCount
)
56 const sal_UCS4
* pRangePtr
= mpRangeCodes
;
57 for( int i
= mnRangeCount
; --i
>= 0; pRangePtr
+= 2 )
59 sal_UCS4 cFirst
= pRangePtr
[0];
60 sal_UCS4 cLast
= pRangePtr
[1];
61 mnCharCount
+= cLast
- cFirst
;
65 ImplFontCharMapRef
const & ImplFontCharMap::getDefaultMap( bool bSymbols
)
67 const sal_UCS4
* pRangeCodes
= aDefaultUnicodeRanges
;
68 int nCodesCount
= sizeof(aDefaultUnicodeRanges
) / sizeof(*pRangeCodes
);
71 pRangeCodes
= aDefaultSymbolRanges
;
72 nCodesCount
= sizeof(aDefaultSymbolRanges
) / sizeof(*pRangeCodes
);
75 CmapResult
aDefaultCR( bSymbols
, pRangeCodes
, nCodesCount
/2 );
76 xDefaultImplFontCharMap
= ImplFontCharMapRef(new ImplFontCharMap(aDefaultCR
));
78 return xDefaultImplFontCharMap
;
81 bool ImplFontCharMap::isDefaultMap() const
83 const bool bIsDefault
= (mpRangeCodes
== aDefaultUnicodeRanges
) || (mpRangeCodes
== aDefaultSymbolRanges
);
87 static unsigned GetUInt( const unsigned char* p
) { return((p
[0]<<24)+(p
[1]<<16)+(p
[2]<<8)+p
[3]);}
88 static unsigned Getsal_uInt16( const unsigned char* p
){ return((p
[0]<<8) | p
[1]);}
89 static int GetSShort( const unsigned char* p
){ return static_cast<sal_Int16
>((p
[0]<<8)|p
[1]);}
91 // TODO: move CMAP parsing directly into the ImplFontCharMap class
92 bool ParseCMAP( const unsigned char* pCmap
, int nLength
, CmapResult
& rResult
)
94 rResult
.mpRangeCodes
= nullptr;
95 rResult
.mpStartGlyphs
= nullptr;
96 rResult
.mpGlyphIds
= nullptr;
97 rResult
.mnRangeCount
= 0;
98 rResult
.mbRecoded
= false;
99 rResult
.mbSymbolic
= false;
101 // parse the table header and check for validity
102 if( !pCmap
|| (nLength
< 24) )
105 if( Getsal_uInt16( pCmap
) != 0x0000 ) // simple check for CMAP corruption
108 int nSubTables
= Getsal_uInt16( pCmap
+ 2 );
109 if( (nSubTables
<= 0) || (nLength
< (24 + 8*nSubTables
)) )
112 const unsigned char* pEndValidArea
= pCmap
+ nLength
;
114 // find the most interesting subtable in the CMAP
115 rtl_TextEncoding eRecodeFrom
= RTL_TEXTENCODING_UNICODE
;
119 for( const unsigned char* p
= pCmap
+ 4; --nSubTables
>= 0; p
+= 8 )
121 int nPlatform
= Getsal_uInt16( p
);
122 int nEncoding
= Getsal_uInt16( p
+2 );
123 int nPlatformEncoding
= (nPlatform
<< 8) + nEncoding
;
126 rtl_TextEncoding eTmpEncoding
= RTL_TEXTENCODING_UNICODE
;
127 switch( nPlatformEncoding
)
129 case 0x000: nValue
= 20; break; // Unicode 1.0
130 case 0x001: nValue
= 21; break; // Unicode 1.1
131 case 0x002: nValue
= 22; break; // iso10646_1993
132 case 0x003: nValue
= 23; break; // UCS-2
133 case 0x004: nValue
= 24; break; // UCS-4
134 case 0x100: nValue
= 22; break; // Mac Unicode<2.0
135 case 0x103: nValue
= 23; break; // Mac Unicode>2.0
136 case 0x300: nValue
= 5; rResult
.mbSymbolic
= true; break; // Win Symbol
137 case 0x301: nValue
= 28; break; // Win UCS-2
138 case 0x30A: nValue
= 29; break; // Win-UCS-4
139 case 0x302: nValue
= 11; eTmpEncoding
= RTL_TEXTENCODING_SHIFT_JIS
; break;
140 case 0x303: nValue
= 12; eTmpEncoding
= RTL_TEXTENCODING_GB_18030
; break;
141 case 0x304: nValue
= 11; eTmpEncoding
= RTL_TEXTENCODING_BIG5
; break;
142 case 0x305: nValue
= 11; eTmpEncoding
= RTL_TEXTENCODING_MS_949
; break;
143 case 0x306: nValue
= 11; eTmpEncoding
= RTL_TEXTENCODING_MS_1361
; break;
144 default: nValue
= 0; break;
147 if( nValue
<= 0 ) // ignore unknown encodings
150 int nTmpOffset
= GetUInt( p
+4 );
151 int nTmpFormat
= Getsal_uInt16( pCmap
+ nTmpOffset
);
152 if( nTmpFormat
== 12 ) // 32bit code -> glyph map format
154 else if( nTmpFormat
!= 4 ) // 16bit code -> glyph map format
155 continue; // ignore other formats
157 if( nBestVal
< nValue
)
160 nOffset
= nTmpOffset
;
161 nFormat
= nTmpFormat
;
162 eRecodeFrom
= eTmpEncoding
;
166 // parse the best CMAP subtable
168 sal_UCS4
* pCodePairs
= nullptr;
169 int* pStartGlyphs
= nullptr;
171 std::vector
<sal_uInt16
> aGlyphIdArray
;
172 aGlyphIdArray
.reserve( 0x1000 );
173 aGlyphIdArray
.push_back( 0 );
175 // format 4, the most common 16bit char mapping table
176 if( (nFormat
== 4) && ((nOffset
+16) < nLength
) )
178 int nSegCountX2
= Getsal_uInt16( pCmap
+ nOffset
+ 6 );
179 nRangeCount
= nSegCountX2
/2 - 1;
180 pCodePairs
= new sal_UCS4
[ nRangeCount
* 2 ];
181 pStartGlyphs
= new int[ nRangeCount
];
182 const unsigned char* pLimitBase
= pCmap
+ nOffset
+ 14;
183 const unsigned char* pBeginBase
= pLimitBase
+ nSegCountX2
+ 2;
184 const unsigned char* pDeltaBase
= pBeginBase
+ nSegCountX2
;
185 const unsigned char* pOffsetBase
= pDeltaBase
+ nSegCountX2
;
186 sal_UCS4
* pCP
= pCodePairs
;
187 for( int i
= 0; i
< nRangeCount
; ++i
)
189 const sal_UCS4 cMinChar
= Getsal_uInt16( pBeginBase
+ 2*i
);
190 const sal_UCS4 cMaxChar
= Getsal_uInt16( pLimitBase
+ 2*i
);
191 const int nGlyphDelta
= GetSShort( pDeltaBase
+ 2*i
);
192 const int nRangeOffset
= Getsal_uInt16( pOffsetBase
+ 2*i
);
193 if( cMinChar
> cMaxChar
) { // no sane font should trigger this
194 SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
197 if( cMaxChar
== 0xFFFF ) {
198 SAL_WARN("vcl.gdi", "Format 4 char should not be 0xFFFF");
201 if( !nRangeOffset
) {
202 // glyphid can be calculated directly
203 pStartGlyphs
[i
] = (cMinChar
+ nGlyphDelta
) & 0xFFFF;
205 // update the glyphid-array with the glyphs in this range
206 pStartGlyphs
[i
] = -(int)aGlyphIdArray
.size();
207 const unsigned char* pGlyphIdPtr
= pOffsetBase
+ 2*i
+ nRangeOffset
;
208 const size_t nRemainingSize
= pEndValidArea
- pGlyphIdPtr
;
209 const size_t nMaxPossibleRecords
= nRemainingSize
/2;
210 if (nMaxPossibleRecords
== 0) { // no sane font should trigger this
211 SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
214 const size_t nMaxLegalChar
= cMinChar
+ nMaxPossibleRecords
-1;
215 if (cMaxChar
> nMaxLegalChar
) { // no sane font should trigger this
216 SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
219 for( sal_UCS4 c
= cMinChar
; c
<= cMaxChar
; ++c
, pGlyphIdPtr
+=2 ) {
220 const int nGlyphIndex
= Getsal_uInt16( pGlyphIdPtr
) + nGlyphDelta
;
221 aGlyphIdArray
.push_back( static_cast<sal_uInt16
>(nGlyphIndex
) );
225 *(pCP
++) = cMaxChar
+ 1;
227 nRangeCount
= (pCP
- pCodePairs
) / 2;
229 // format 12, the most common 32bit char mapping table
230 else if( (nFormat
== 12) && ((nOffset
+16) < nLength
) )
232 nRangeCount
= GetUInt( pCmap
+ nOffset
+ 12 );
233 pCodePairs
= new sal_UCS4
[ nRangeCount
* 2 ];
234 pStartGlyphs
= new int[ nRangeCount
];
235 const unsigned char* pGroup
= pCmap
+ nOffset
+ 16;
236 sal_UCS4
* pCP
= pCodePairs
;
237 for( int i
= 0; i
< nRangeCount
; ++i
)
239 sal_UCS4 cMinChar
= GetUInt( pGroup
+ 0 );
240 sal_UCS4 cMaxChar
= GetUInt( pGroup
+ 4 );
241 int nGlyphId
= GetUInt( pGroup
+ 8 );
244 if( cMinChar
> cMaxChar
) { // no sane font should trigger this
245 SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
250 *(pCP
++) = cMaxChar
+ 1;
251 pStartGlyphs
[i
] = nGlyphId
;
253 nRangeCount
= (pCP
- pCodePairs
) / 2;
256 // check if any subtable resulted in something usable
257 if( nRangeCount
<= 0 )
260 delete[] pStartGlyphs
;
262 // even when no CMAP is available we know it for symbol fonts
263 if( rResult
.mbSymbolic
)
265 pCodePairs
= new sal_UCS4
[4];
266 pCodePairs
[0] = 0x0020; // aliased symbols
267 pCodePairs
[1] = 0x0100;
268 pCodePairs
[2] = 0xF020; // original symbols
269 pCodePairs
[3] = 0xF100;
270 rResult
.mpRangeCodes
= pCodePairs
;
271 rResult
.mnRangeCount
= 2;
278 // recode the code ranges to their unicode encoded ranges if needed
279 rtl_TextToUnicodeConverter aConverter
= nullptr;
280 rtl_UnicodeToTextContext aCvtContext
= nullptr;
282 rResult
.mbRecoded
= ( eRecodeFrom
!= RTL_TEXTENCODING_UNICODE
);
283 if( rResult
.mbRecoded
)
285 aConverter
= rtl_createTextToUnicodeConverter( eRecodeFrom
);
286 aCvtContext
= rtl_createTextToUnicodeContext( aConverter
);
289 if( aConverter
&& aCvtContext
)
291 // determine the set of supported unicodes from encoded ranges
292 std::set
<sal_UCS4
> aSupportedUnicodes
;
294 static const int NINSIZE
= 64;
295 static const int NOUTSIZE
= 64;
296 sal_Char cCharsInp
[ NINSIZE
];
297 sal_Unicode cCharsOut
[ NOUTSIZE
];
298 sal_UCS4
* pCP
= pCodePairs
;
299 for( int i
= 0; i
< nRangeCount
; ++i
)
301 sal_UCS4 cMin
= *(pCP
++);
302 sal_UCS4 cEnd
= *(pCP
++);
306 for(; (cMin
< cEnd
) && (j
< NINSIZE
); ++cMin
)
309 cCharsInp
[ j
++ ] = static_cast<sal_Char
>(cMin
>> 8);
310 if( (cMin
>= 0x0100) || (cMin
< 0x00A0) )
311 cCharsInp
[ j
++ ] = static_cast<sal_Char
>(cMin
);
315 sal_Size nSrcCvtBytes
;
316 int nOutLen
= rtl_convertTextToUnicode(
317 aConverter
, aCvtContext
,
318 cCharsInp
, j
, cCharsOut
, NOUTSIZE
,
319 RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE
320 | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE
,
321 &nCvtInfo
, &nSrcCvtBytes
);
323 for( j
= 0; j
< nOutLen
; ++j
)
324 aSupportedUnicodes
.insert( cCharsOut
[j
] );
328 rtl_destroyTextToUnicodeConverter( aCvtContext
);
329 rtl_destroyTextToUnicodeConverter( aConverter
);
331 // convert the set of supported unicodes to ranges
332 std::vector
<sal_UCS4
> aSupportedRanges
;
334 std::set
<sal_UCS4
>::const_iterator itChar
= aSupportedUnicodes
.begin();
335 for(; itChar
!= aSupportedUnicodes
.end(); ++itChar
)
337 if( aSupportedRanges
.empty()
338 || (aSupportedRanges
.back() != *itChar
) )
340 // add new range beginning with current unicode
341 aSupportedRanges
.push_back( *itChar
);
342 aSupportedRanges
.push_back( 0 );
345 // extend existing range to include current unicode
346 aSupportedRanges
.back() = *itChar
+ 1;
349 // glyph mapping for non-unicode fonts not implemented
350 delete[] pStartGlyphs
;
351 pStartGlyphs
= nullptr;
352 aGlyphIdArray
.clear();
354 // make a pCodePairs array using the vector from above
356 nRangeCount
= aSupportedRanges
.size() / 2;
357 if( nRangeCount
<= 0 )
359 pCodePairs
= new sal_UCS4
[ nRangeCount
* 2 ];
360 std::vector
<sal_UCS4
>::const_iterator itInt
= aSupportedRanges
.begin();
361 for( pCP
= pCodePairs
; itInt
!= aSupportedRanges
.end(); ++itInt
)
365 // prepare the glyphid-array if needed
366 // TODO: merge ranges if they are close enough?
367 sal_uInt16
* pGlyphIds
= nullptr;
368 if( !aGlyphIdArray
.empty())
370 pGlyphIds
= new sal_uInt16
[ aGlyphIdArray
.size() ];
371 sal_uInt16
* pOut
= pGlyphIds
;
372 std::vector
<sal_uInt16
>::const_iterator it
= aGlyphIdArray
.begin();
373 while( it
!= aGlyphIdArray
.end() )
377 // update the result struct
378 rResult
.mpRangeCodes
= pCodePairs
;
379 rResult
.mpStartGlyphs
= pStartGlyphs
;
380 rResult
.mnRangeCount
= nRangeCount
;
381 rResult
.mpGlyphIds
= pGlyphIds
;
385 FontCharMap::FontCharMap()
386 : mpImplFontCharMap( ImplFontCharMap::getDefaultMap() )
390 FontCharMap::FontCharMap( ImplFontCharMapRef
const & pIFCMap
)
391 : mpImplFontCharMap( pIFCMap
)
395 FontCharMap::FontCharMap( const CmapResult
& rCR
)
396 : mpImplFontCharMap(new ImplFontCharMap(rCR
))
400 FontCharMap::~FontCharMap()
402 mpImplFontCharMap
= nullptr;
405 FontCharMapRef
FontCharMap::GetDefaultMap( bool bSymbol
)
407 FontCharMapRef
xFontCharMap( new FontCharMap( ImplFontCharMap::getDefaultMap( bSymbol
) ) );
411 bool FontCharMap::IsDefaultMap() const
413 return mpImplFontCharMap
->isDefaultMap();
416 int FontCharMap::GetCharCount() const
418 return mpImplFontCharMap
->mnCharCount
;
421 int FontCharMap::CountCharsInRange( sal_UCS4 cMin
, sal_UCS4 cMax
) const
425 // find and adjust range and char count for cMin
426 int nRangeMin
= findRangeIndex( cMin
);
429 else if( cMin
> mpImplFontCharMap
->mpRangeCodes
[ nRangeMin
] )
430 nCount
-= cMin
- mpImplFontCharMap
->mpRangeCodes
[ nRangeMin
];
432 // find and adjust range and char count for cMax
433 int nRangeMax
= findRangeIndex( cMax
);
437 nCount
-= mpImplFontCharMap
->mpRangeCodes
[ nRangeMax
+1 ] - cMax
- 1;
439 // count chars in complete ranges between cMin and cMax
440 for( int i
= nRangeMin
; i
<= nRangeMax
; i
+=2 )
441 nCount
+= mpImplFontCharMap
->mpRangeCodes
[i
+1] - mpImplFontCharMap
->mpRangeCodes
[i
];
446 bool FontCharMap::HasChar( sal_UCS4 cChar
) const
448 bool bHasChar
= false;
450 if( mpImplFontCharMap
->mpStartGlyphs
== nullptr ) { // only the char-ranges are known
451 const int nRange
= findRangeIndex( cChar
);
452 if( nRange
==0 && cChar
< mpImplFontCharMap
->mpRangeCodes
[0] )
454 bHasChar
= ((nRange
& 1) == 0); // inside a range
455 } else { // glyph mapping is available
456 const int nGlyphIndex
= GetGlyphIndex( cChar
);
457 bHasChar
= (nGlyphIndex
!= 0); // not the notdef-glyph
463 sal_UCS4
FontCharMap::GetFirstChar() const
465 return mpImplFontCharMap
->mpRangeCodes
[0];
468 sal_UCS4
FontCharMap::GetLastChar() const
470 return (mpImplFontCharMap
->mpRangeCodes
[ 2*mpImplFontCharMap
->mnRangeCount
-1 ] - 1);
473 sal_UCS4
FontCharMap::GetNextChar( sal_UCS4 cChar
) const
475 if( cChar
< GetFirstChar() )
476 return GetFirstChar();
477 if( cChar
>= GetLastChar() )
478 return GetLastChar();
480 int nRange
= findRangeIndex( cChar
+ 1 );
481 if( nRange
& 1 ) // outside of range?
482 return mpImplFontCharMap
->mpRangeCodes
[ nRange
+ 1 ]; // => first in next range
486 sal_UCS4
FontCharMap::GetPrevChar( sal_UCS4 cChar
) const
488 if( cChar
<= GetFirstChar() )
489 return GetFirstChar();
490 if( cChar
> GetLastChar() )
491 return GetLastChar();
493 int nRange
= findRangeIndex( cChar
- 1 );
494 if( nRange
& 1 ) // outside a range?
495 return (mpImplFontCharMap
->mpRangeCodes
[ nRange
] - 1); // => last in prev range
499 int FontCharMap::GetIndexFromChar( sal_UCS4 cChar
) const
501 // TODO: improve linear walk?
503 const sal_UCS4
* pRange
= &mpImplFontCharMap
->mpRangeCodes
[0];
504 for( int i
= 0; i
< mpImplFontCharMap
->mnRangeCount
; ++i
)
506 sal_UCS4 cFirst
= *(pRange
++);
507 sal_UCS4 cLast
= *(pRange
++);
509 nCharIndex
+= cLast
- cFirst
;
510 else if( cChar
>= cFirst
)
511 return nCharIndex
+ (cChar
- cFirst
);
519 sal_UCS4
FontCharMap::GetCharFromIndex( int nIndex
) const
521 // TODO: improve linear walk?
522 const sal_UCS4
* pRange
= &mpImplFontCharMap
->mpRangeCodes
[0];
523 for( int i
= 0; i
< mpImplFontCharMap
->mnRangeCount
; ++i
)
525 sal_UCS4 cFirst
= *(pRange
++);
526 sal_UCS4 cLast
= *(pRange
++);
527 nIndex
-= cLast
- cFirst
;
529 return (cLast
+ nIndex
);
532 // we can only get here with an out-of-bounds charindex
533 return mpImplFontCharMap
->mpRangeCodes
[0];
536 int FontCharMap::findRangeIndex( sal_UCS4 cChar
) const
539 int nMid
= mpImplFontCharMap
->mnRangeCount
;
540 int nUpper
= 2 * mpImplFontCharMap
->mnRangeCount
- 1;
541 while( nLower
< nUpper
)
543 if( cChar
>= mpImplFontCharMap
->mpRangeCodes
[ nMid
] )
547 nMid
= (nLower
+ nUpper
+ 1) / 2;
553 int FontCharMap::GetGlyphIndex( sal_UCS4 cChar
) const
555 // return -1 if the object doesn't know the glyph ids
556 if( !mpImplFontCharMap
->mpStartGlyphs
)
559 // return 0 if the unicode doesn't have a matching glyph
560 int nRange
= findRangeIndex( cChar
);
561 // check that we are inside any range
562 if( (nRange
== 0) && (cChar
< mpImplFontCharMap
->mpRangeCodes
[0]) ) {
563 // symbol aliasing gives symbol fonts a second chance
564 const bool bSymbolic
= cChar
<= 0xFF && (mpImplFontCharMap
->mpRangeCodes
[0]>=0xF000) &&
565 (mpImplFontCharMap
->mpRangeCodes
[1]<=0xF0FF);
568 // check for symbol aliasing (U+F0xx -> U+00xx)
570 nRange
= findRangeIndex( cChar
);
571 if( (nRange
== 0) && (cChar
< mpImplFontCharMap
->mpRangeCodes
[0]) ) {
575 // check that we are inside a range
576 if( (nRange
& 1) != 0 )
579 // get glyph index directly or indirectly
580 int nGlyphIndex
= cChar
- mpImplFontCharMap
->mpRangeCodes
[ nRange
];
581 const int nStartIndex
= mpImplFontCharMap
->mpStartGlyphs
[ nRange
/2 ];
582 if( nStartIndex
>= 0 ) {
583 // the glyph index can be calculated
584 nGlyphIndex
+= nStartIndex
;
586 // the glyphid array has the glyph index
587 nGlyphIndex
= mpImplFontCharMap
->mpGlyphIds
[ nGlyphIndex
- nStartIndex
];
593 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */