2 * This file is part of the LibreOffice project.
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 * This file incorporates work covered by the following license notice:
10 * Licensed to the Apache Software Foundation (ASF) under one or more
11 * contributor license agreements. See the NOTICE file distributed
12 * with this work for additional information regarding copyright
13 * ownership. The ASF licenses this file to you under the Apache
14 * License, Version 2.0 (the "License"); you may not use this file
15 * except in compliance with the License. You may obtain a copy of
16 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 #include <vcl/fontcharmap.hxx>
19 #include <fontinstance.hxx>
20 #include <impfontcharmap.hxx>
25 CmapResult::CmapResult( bool bSymbolic
,
26 const sal_UCS4
* pRangeCodes
, int nRangeCount
)
27 : mpRangeCodes( pRangeCodes
)
28 , mpStartGlyphs( nullptr)
29 , mpGlyphIds( nullptr)
30 , mnRangeCount( nRangeCount
)
31 , mbSymbolic( bSymbolic
)
35 static ImplFontCharMapRef xDefaultImplFontCharMap
;
36 static const sal_UCS4 aDefaultUnicodeRanges
[] = {0x0020,0xD800, 0xE000,0xFFF0};
37 static const sal_UCS4 aDefaultSymbolRanges
[] = {0x0020,0x0100, 0xF020,0xF100};
39 ImplFontCharMap::~ImplFontCharMap()
43 delete[] mpRangeCodes
;
44 delete[] mpStartGlyphs
;
48 ImplFontCharMap::ImplFontCharMap( const CmapResult
& rCR
)
49 : mpRangeCodes( rCR
.mpRangeCodes
)
50 , mpStartGlyphs( rCR
.mpStartGlyphs
)
51 , mpGlyphIds( rCR
.mpGlyphIds
)
52 , mnRangeCount( rCR
.mnRangeCount
)
55 const sal_UCS4
* pRangePtr
= mpRangeCodes
;
56 for( int i
= mnRangeCount
; --i
>= 0; pRangePtr
+= 2 )
58 sal_UCS4 cFirst
= pRangePtr
[0];
59 sal_UCS4 cLast
= pRangePtr
[1];
60 mnCharCount
+= cLast
- cFirst
;
64 ImplFontCharMapRef
const & ImplFontCharMap::getDefaultMap( bool bSymbols
)
66 const sal_UCS4
* pRangeCodes
= aDefaultUnicodeRanges
;
67 int nCodesCount
= sizeof(aDefaultUnicodeRanges
) / sizeof(*pRangeCodes
);
70 pRangeCodes
= aDefaultSymbolRanges
;
71 nCodesCount
= sizeof(aDefaultSymbolRanges
) / sizeof(*pRangeCodes
);
74 CmapResult
aDefaultCR( bSymbols
, pRangeCodes
, nCodesCount
/2 );
75 xDefaultImplFontCharMap
= ImplFontCharMapRef(new ImplFontCharMap(aDefaultCR
));
77 return xDefaultImplFontCharMap
;
80 bool ImplFontCharMap::isDefaultMap() const
82 const bool bIsDefault
= (mpRangeCodes
== aDefaultUnicodeRanges
) || (mpRangeCodes
== aDefaultSymbolRanges
);
86 static unsigned GetUInt( const unsigned char* p
) { return((p
[0]<<24)+(p
[1]<<16)+(p
[2]<<8)+p
[3]);}
87 static unsigned Getsal_uInt16( const unsigned char* p
){ return((p
[0]<<8) | p
[1]);}
88 static int GetSShort( const unsigned char* p
){ return static_cast<sal_Int16
>((p
[0]<<8)|p
[1]);}
90 // TODO: move CMAP parsing directly into the ImplFontCharMap class
91 bool ParseCMAP( const unsigned char* pCmap
, int nLength
, CmapResult
& rResult
)
93 rResult
.mpRangeCodes
= nullptr;
94 rResult
.mpStartGlyphs
= nullptr;
95 rResult
.mpGlyphIds
= nullptr;
96 rResult
.mnRangeCount
= 0;
97 rResult
.mbRecoded
= false;
98 rResult
.mbSymbolic
= false;
100 // parse the table header and check for validity
101 if( !pCmap
|| (nLength
< 24) )
104 if( Getsal_uInt16( pCmap
) != 0x0000 ) // simple check for CMAP corruption
107 int nSubTables
= Getsal_uInt16( pCmap
+ 2 );
108 if( (nSubTables
<= 0) || (nLength
< (24 + 8*nSubTables
)) )
111 const unsigned char* pEndValidArea
= pCmap
+ nLength
;
113 // find the most interesting subtable in the CMAP
114 rtl_TextEncoding eRecodeFrom
= RTL_TEXTENCODING_UNICODE
;
118 for( const unsigned char* p
= pCmap
+ 4; --nSubTables
>= 0; p
+= 8 )
120 int nPlatform
= Getsal_uInt16( p
);
121 int nEncoding
= Getsal_uInt16( p
+2 );
122 int nPlatformEncoding
= (nPlatform
<< 8) + nEncoding
;
125 rtl_TextEncoding eTmpEncoding
= RTL_TEXTENCODING_UNICODE
;
126 switch( nPlatformEncoding
)
128 case 0x000: nValue
= 20; break; // Unicode 1.0
129 case 0x001: nValue
= 21; break; // Unicode 1.1
130 case 0x002: nValue
= 22; break; // iso10646_1993
131 case 0x003: nValue
= 23; break; // UCS-2
132 case 0x004: nValue
= 24; break; // UCS-4
133 case 0x100: nValue
= 22; break; // Mac Unicode<2.0
134 case 0x103: nValue
= 23; break; // Mac Unicode>2.0
135 case 0x300: nValue
= 5; rResult
.mbSymbolic
= true; break; // Win Symbol
136 case 0x301: nValue
= 28; break; // Win UCS-2
137 case 0x30A: nValue
= 29; break; // Win-UCS-4
138 case 0x302: nValue
= 11; eTmpEncoding
= RTL_TEXTENCODING_SHIFT_JIS
; break;
139 case 0x303: nValue
= 12; eTmpEncoding
= RTL_TEXTENCODING_GB_18030
; break;
140 case 0x304: nValue
= 11; eTmpEncoding
= RTL_TEXTENCODING_BIG5
; break;
141 case 0x305: nValue
= 11; eTmpEncoding
= RTL_TEXTENCODING_MS_949
; break;
142 case 0x306: nValue
= 11; eTmpEncoding
= RTL_TEXTENCODING_MS_1361
; break;
143 default: nValue
= 0; break;
146 if( nValue
<= 0 ) // ignore unknown encodings
149 int nTmpOffset
= GetUInt( p
+4 );
150 int nTmpFormat
= Getsal_uInt16( pCmap
+ nTmpOffset
);
151 if( nTmpFormat
== 12 ) // 32bit code -> glyph map format
153 else if( nTmpFormat
!= 4 ) // 16bit code -> glyph map format
154 continue; // ignore other formats
156 if( nBestVal
< nValue
)
159 nOffset
= nTmpOffset
;
160 nFormat
= nTmpFormat
;
161 eRecodeFrom
= eTmpEncoding
;
165 // parse the best CMAP subtable
167 sal_UCS4
* pCodePairs
= nullptr;
168 int* pStartGlyphs
= nullptr;
170 std::vector
<sal_uInt16
> aGlyphIdArray
;
171 aGlyphIdArray
.reserve( 0x1000 );
172 aGlyphIdArray
.push_back( 0 );
174 // format 4, the most common 16bit char mapping table
175 if( (nFormat
== 4) && ((nOffset
+16) < nLength
) )
177 int nSegCountX2
= Getsal_uInt16( pCmap
+ nOffset
+ 6 );
178 nRangeCount
= nSegCountX2
/2 - 1;
179 pCodePairs
= new sal_UCS4
[ nRangeCount
* 2 ];
180 pStartGlyphs
= new int[ nRangeCount
];
181 const unsigned char* pLimitBase
= pCmap
+ nOffset
+ 14;
182 const unsigned char* pBeginBase
= pLimitBase
+ nSegCountX2
+ 2;
183 const unsigned char* pDeltaBase
= pBeginBase
+ nSegCountX2
;
184 const unsigned char* pOffsetBase
= pDeltaBase
+ nSegCountX2
;
185 sal_UCS4
* pCP
= pCodePairs
;
186 for( int i
= 0; i
< nRangeCount
; ++i
)
188 const sal_UCS4 cMinChar
= Getsal_uInt16( pBeginBase
+ 2*i
);
189 const sal_UCS4 cMaxChar
= Getsal_uInt16( pLimitBase
+ 2*i
);
190 const int nGlyphDelta
= GetSShort( pDeltaBase
+ 2*i
);
191 const int nRangeOffset
= Getsal_uInt16( pOffsetBase
+ 2*i
);
192 if( cMinChar
> cMaxChar
) { // no sane font should trigger this
193 SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
196 if( cMaxChar
== 0xFFFF ) {
197 SAL_WARN("vcl.gdi", "Format 4 char should not be 0xFFFF");
200 if( !nRangeOffset
) {
201 // glyphid can be calculated directly
202 pStartGlyphs
[i
] = (cMinChar
+ nGlyphDelta
) & 0xFFFF;
204 // update the glyphid-array with the glyphs in this range
205 pStartGlyphs
[i
] = -(int)aGlyphIdArray
.size();
206 const unsigned char* pGlyphIdPtr
= pOffsetBase
+ 2*i
+ nRangeOffset
;
207 const size_t nRemainingSize
= pEndValidArea
- pGlyphIdPtr
;
208 const size_t nMaxPossibleRecords
= nRemainingSize
/2;
209 if (nMaxPossibleRecords
== 0) { // no sane font should trigger this
210 SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
213 const size_t nMaxLegalChar
= cMinChar
+ nMaxPossibleRecords
-1;
214 if (cMaxChar
> nMaxLegalChar
) { // no sane font should trigger this
215 SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
218 for( sal_UCS4 c
= cMinChar
; c
<= cMaxChar
; ++c
, pGlyphIdPtr
+=2 ) {
219 const int nGlyphIndex
= Getsal_uInt16( pGlyphIdPtr
) + nGlyphDelta
;
220 aGlyphIdArray
.push_back( static_cast<sal_uInt16
>(nGlyphIndex
) );
224 *(pCP
++) = cMaxChar
+ 1;
226 nRangeCount
= (pCP
- pCodePairs
) / 2;
228 // format 12, the most common 32bit char mapping table
229 else if( (nFormat
== 12) && ((nOffset
+16) < nLength
) )
231 nRangeCount
= GetUInt( pCmap
+ nOffset
+ 12 );
232 pCodePairs
= new sal_UCS4
[ nRangeCount
* 2 ];
233 pStartGlyphs
= new int[ nRangeCount
];
234 const unsigned char* pGroup
= pCmap
+ nOffset
+ 16;
235 sal_UCS4
* pCP
= pCodePairs
;
236 for( int i
= 0; i
< nRangeCount
; ++i
)
238 sal_UCS4 cMinChar
= GetUInt( pGroup
+ 0 );
239 sal_UCS4 cMaxChar
= GetUInt( pGroup
+ 4 );
240 int nGlyphId
= GetUInt( pGroup
+ 8 );
243 if( cMinChar
> cMaxChar
) { // no sane font should trigger this
244 SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
249 *(pCP
++) = cMaxChar
+ 1;
250 pStartGlyphs
[i
] = nGlyphId
;
252 nRangeCount
= (pCP
- pCodePairs
) / 2;
255 // check if any subtable resulted in something usable
256 if( nRangeCount
<= 0 )
259 delete[] pStartGlyphs
;
261 // even when no CMAP is available we know it for symbol fonts
262 if( rResult
.mbSymbolic
)
264 pCodePairs
= new sal_UCS4
[4];
265 pCodePairs
[0] = 0x0020; // aliased symbols
266 pCodePairs
[1] = 0x0100;
267 pCodePairs
[2] = 0xF020; // original symbols
268 pCodePairs
[3] = 0xF100;
269 rResult
.mpRangeCodes
= pCodePairs
;
270 rResult
.mnRangeCount
= 2;
277 // recode the code ranges to their unicode encoded ranges if needed
278 rtl_TextToUnicodeConverter aConverter
= nullptr;
279 rtl_UnicodeToTextContext aCvtContext
= nullptr;
281 rResult
.mbRecoded
= ( eRecodeFrom
!= RTL_TEXTENCODING_UNICODE
);
282 if( rResult
.mbRecoded
)
284 aConverter
= rtl_createTextToUnicodeConverter( eRecodeFrom
);
285 aCvtContext
= rtl_createTextToUnicodeContext( aConverter
);
288 if( aConverter
&& aCvtContext
)
290 // determine the set of supported code points from encoded ranges
291 std::set
<sal_UCS4
> aSupportedCodePoints
;
293 static const int NINSIZE
= 64;
294 static const int NOUTSIZE
= 64;
295 sal_Char cCharsInp
[ NINSIZE
];
296 sal_Unicode cCharsOut
[ NOUTSIZE
];
297 sal_UCS4
* pCP
= pCodePairs
;
298 for( int i
= 0; i
< nRangeCount
; ++i
)
300 sal_UCS4 cMin
= *(pCP
++);
301 sal_UCS4 cEnd
= *(pCP
++);
305 for(; (cMin
< cEnd
) && (j
< NINSIZE
); ++cMin
)
308 cCharsInp
[ j
++ ] = static_cast<sal_Char
>(cMin
>> 8);
309 if( (cMin
>= 0x0100) || (cMin
< 0x00A0) )
310 cCharsInp
[ j
++ ] = static_cast<sal_Char
>(cMin
);
314 sal_Size nSrcCvtBytes
;
315 int nOutLen
= rtl_convertTextToUnicode(
316 aConverter
, aCvtContext
,
317 cCharsInp
, j
, cCharsOut
, NOUTSIZE
,
318 RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE
319 | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE
,
320 &nCvtInfo
, &nSrcCvtBytes
);
322 for( j
= 0; j
< nOutLen
; ++j
)
323 aSupportedCodePoints
.insert( cCharsOut
[j
] );
327 rtl_destroyTextToUnicodeConverter( aCvtContext
);
328 rtl_destroyTextToUnicodeConverter( aConverter
);
330 // convert the set of supported code points to ranges
331 std::vector
<sal_UCS4
> aSupportedRanges
;
333 std::set
<sal_UCS4
>::const_iterator itChar
= aSupportedCodePoints
.begin();
334 for(; itChar
!= aSupportedCodePoints
.end(); ++itChar
)
336 if( aSupportedRanges
.empty()
337 || (aSupportedRanges
.back() != *itChar
) )
339 // add new range beginning with current unicode
340 aSupportedRanges
.push_back( *itChar
);
341 aSupportedRanges
.push_back( 0 );
344 // extend existing range to include current unicode
345 aSupportedRanges
.back() = *itChar
+ 1;
348 // glyph mapping for non-unicode fonts not implemented
349 delete[] pStartGlyphs
;
350 pStartGlyphs
= nullptr;
351 aGlyphIdArray
.clear();
353 // make a pCodePairs array using the vector from above
355 nRangeCount
= aSupportedRanges
.size() / 2;
356 if( nRangeCount
<= 0 )
358 pCodePairs
= new sal_UCS4
[ nRangeCount
* 2 ];
359 std::vector
<sal_UCS4
>::const_iterator itInt
= aSupportedRanges
.begin();
360 for( pCP
= pCodePairs
; itInt
!= aSupportedRanges
.end(); ++itInt
)
364 // prepare the glyphid-array if needed
365 // TODO: merge ranges if they are close enough?
366 sal_uInt16
* pGlyphIds
= nullptr;
367 if( !aGlyphIdArray
.empty())
369 pGlyphIds
= new sal_uInt16
[ aGlyphIdArray
.size() ];
370 sal_uInt16
* pOut
= pGlyphIds
;
371 std::vector
<sal_uInt16
>::const_iterator it
= aGlyphIdArray
.begin();
372 while( it
!= aGlyphIdArray
.end() )
376 // update the result struct
377 rResult
.mpRangeCodes
= pCodePairs
;
378 rResult
.mpStartGlyphs
= pStartGlyphs
;
379 rResult
.mnRangeCount
= nRangeCount
;
380 rResult
.mpGlyphIds
= pGlyphIds
;
384 FontCharMap::FontCharMap()
385 : mpImplFontCharMap( ImplFontCharMap::getDefaultMap() )
389 FontCharMap::FontCharMap( ImplFontCharMapRef
const & pIFCMap
)
390 : mpImplFontCharMap( pIFCMap
)
394 FontCharMap::FontCharMap( const CmapResult
& rCR
)
395 : mpImplFontCharMap(new ImplFontCharMap(rCR
))
399 FontCharMap::~FontCharMap()
401 mpImplFontCharMap
= nullptr;
404 FontCharMapRef
FontCharMap::GetDefaultMap( bool bSymbol
)
406 FontCharMapRef
xFontCharMap( new FontCharMap( ImplFontCharMap::getDefaultMap( bSymbol
) ) );
410 bool FontCharMap::IsDefaultMap() const
412 return mpImplFontCharMap
->isDefaultMap();
415 int FontCharMap::GetCharCount() const
417 return mpImplFontCharMap
->mnCharCount
;
420 int FontCharMap::CountCharsInRange( sal_UCS4 cMin
, sal_UCS4 cMax
) const
424 // find and adjust range and char count for cMin
425 int nRangeMin
= findRangeIndex( cMin
);
428 else if( cMin
> mpImplFontCharMap
->mpRangeCodes
[ nRangeMin
] )
429 nCount
-= cMin
- mpImplFontCharMap
->mpRangeCodes
[ nRangeMin
];
431 // find and adjust range and char count for cMax
432 int nRangeMax
= findRangeIndex( cMax
);
436 nCount
-= mpImplFontCharMap
->mpRangeCodes
[ nRangeMax
+1 ] - cMax
- 1;
438 // count chars in complete ranges between cMin and cMax
439 for( int i
= nRangeMin
; i
<= nRangeMax
; i
+=2 )
440 nCount
+= mpImplFontCharMap
->mpRangeCodes
[i
+1] - mpImplFontCharMap
->mpRangeCodes
[i
];
445 bool FontCharMap::HasChar( sal_UCS4 cChar
) const
447 bool bHasChar
= false;
449 if( mpImplFontCharMap
->mpStartGlyphs
== nullptr ) { // only the char-ranges are known
450 const int nRange
= findRangeIndex( cChar
);
451 if( nRange
==0 && cChar
< mpImplFontCharMap
->mpRangeCodes
[0] )
453 bHasChar
= ((nRange
& 1) == 0); // inside a range
454 } else { // glyph mapping is available
455 const int nGlyphIndex
= GetGlyphIndex( cChar
);
456 bHasChar
= (nGlyphIndex
!= 0); // not the notdef-glyph
462 sal_UCS4
FontCharMap::GetFirstChar() const
464 return mpImplFontCharMap
->mpRangeCodes
[0];
467 sal_UCS4
FontCharMap::GetLastChar() const
469 return (mpImplFontCharMap
->mpRangeCodes
[ 2*mpImplFontCharMap
->mnRangeCount
-1 ] - 1);
472 sal_UCS4
FontCharMap::GetNextChar( sal_UCS4 cChar
) const
474 if( cChar
< GetFirstChar() )
475 return GetFirstChar();
476 if( cChar
>= GetLastChar() )
477 return GetLastChar();
479 int nRange
= findRangeIndex( cChar
+ 1 );
480 if( nRange
& 1 ) // outside of range?
481 return mpImplFontCharMap
->mpRangeCodes
[ nRange
+ 1 ]; // => first in next range
485 sal_UCS4
FontCharMap::GetPrevChar( sal_UCS4 cChar
) const
487 if( cChar
<= GetFirstChar() )
488 return GetFirstChar();
489 if( cChar
> GetLastChar() )
490 return GetLastChar();
492 int nRange
= findRangeIndex( cChar
- 1 );
493 if( nRange
& 1 ) // outside a range?
494 return (mpImplFontCharMap
->mpRangeCodes
[ nRange
] - 1); // => last in prev range
498 int FontCharMap::GetIndexFromChar( sal_UCS4 cChar
) const
500 // TODO: improve linear walk?
502 const sal_UCS4
* pRange
= &mpImplFontCharMap
->mpRangeCodes
[0];
503 for( int i
= 0; i
< mpImplFontCharMap
->mnRangeCount
; ++i
)
505 sal_UCS4 cFirst
= *(pRange
++);
506 sal_UCS4 cLast
= *(pRange
++);
508 nCharIndex
+= cLast
- cFirst
;
509 else if( cChar
>= cFirst
)
510 return nCharIndex
+ (cChar
- cFirst
);
518 sal_UCS4
FontCharMap::GetCharFromIndex( int nIndex
) const
520 // TODO: improve linear walk?
521 const sal_UCS4
* pRange
= &mpImplFontCharMap
->mpRangeCodes
[0];
522 for( int i
= 0; i
< mpImplFontCharMap
->mnRangeCount
; ++i
)
524 sal_UCS4 cFirst
= *(pRange
++);
525 sal_UCS4 cLast
= *(pRange
++);
526 nIndex
-= cLast
- cFirst
;
528 return (cLast
+ nIndex
);
531 // we can only get here with an out-of-bounds charindex
532 return mpImplFontCharMap
->mpRangeCodes
[0];
535 int FontCharMap::findRangeIndex( sal_UCS4 cChar
) const
538 int nMid
= mpImplFontCharMap
->mnRangeCount
;
539 int nUpper
= 2 * mpImplFontCharMap
->mnRangeCount
- 1;
540 while( nLower
< nUpper
)
542 if( cChar
>= mpImplFontCharMap
->mpRangeCodes
[ nMid
] )
546 nMid
= (nLower
+ nUpper
+ 1) / 2;
552 int FontCharMap::GetGlyphIndex( sal_UCS4 cChar
) const
554 // return -1 if the object doesn't know the glyph ids
555 if( !mpImplFontCharMap
->mpStartGlyphs
)
558 // return 0 if the unicode doesn't have a matching glyph
559 int nRange
= findRangeIndex( cChar
);
560 // check that we are inside any range
561 if( (nRange
== 0) && (cChar
< mpImplFontCharMap
->mpRangeCodes
[0]) ) {
562 // symbol aliasing gives symbol fonts a second chance
563 const bool bSymbolic
= cChar
<= 0xFF && (mpImplFontCharMap
->mpRangeCodes
[0]>=0xF000) &&
564 (mpImplFontCharMap
->mpRangeCodes
[1]<=0xF0FF);
567 // check for symbol aliasing (U+F0xx -> U+00xx)
569 nRange
= findRangeIndex( cChar
);
570 if( (nRange
== 0) && (cChar
< mpImplFontCharMap
->mpRangeCodes
[0]) ) {
574 // check that we are inside a range
575 if( (nRange
& 1) != 0 )
578 // get glyph index directly or indirectly
579 int nGlyphIndex
= cChar
- mpImplFontCharMap
->mpRangeCodes
[ nRange
];
580 const int nStartIndex
= mpImplFontCharMap
->mpStartGlyphs
[ nRange
/2 ];
581 if( nStartIndex
>= 0 ) {
582 // the glyph index can be calculated
583 nGlyphIndex
+= nStartIndex
;
585 // the glyphid array has the glyph index
586 nGlyphIndex
= mpImplFontCharMap
->mpGlyphIds
[ nGlyphIndex
- nStartIndex
];
592 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */