1 //========================================================================
3 // CharCodeToUnicode.cc
5 // Copyright 2001-2002 Glyph & Cog, LLC
7 //========================================================================
10 #pragma implementation
20 #include "GlobalParams.h"
21 #include "PSTokenizer.h"
22 #include "CharCodeToUnicode.h"
24 //------------------------------------------------------------------------
26 #define maxUnicodeString 8
28 struct CharCodeToUnicodeString
{
30 Unicode u
[maxUnicodeString
];
34 //------------------------------------------------------------------------
36 static int getCharFromString(void *data
) {
50 static int getCharFromFile(void *data
) {
51 return fgetc((FILE *)data
);
54 //------------------------------------------------------------------------
56 CharCodeToUnicode
*CharCodeToUnicode::parseCIDToUnicode(GString
*collectionA
) {
59 CharCode size
, mapLenA
;
62 CharCodeToUnicode
*ctu
;
64 if (!(f
= globalParams
->getCIDToUnicodeFile(collectionA
))) {
65 error(-1, "Couldn't find cidToUnicode file for the '%s' collection",
66 collectionA
->getCString());
71 mapA
= (Unicode
*)gmalloc(size
* sizeof(Unicode
));
74 while (getLine(buf
, sizeof(buf
), f
)) {
75 if (mapLenA
== size
) {
77 mapA
= (Unicode
*)grealloc(mapA
, size
* sizeof(Unicode
));
79 if (sscanf(buf
, "%x", &u
) == 1) {
82 error(-1, "Bad line (%d) in cidToUnicode file for the '%s' collection",
83 (int)(mapLenA
+ 1), collectionA
->getCString());
89 ctu
= new CharCodeToUnicode(collectionA
->copy(), mapA
, mapLenA
, gTrue
,
95 CharCodeToUnicode
*CharCodeToUnicode::make8BitToUnicode(Unicode
*toUnicode
) {
96 return new CharCodeToUnicode(NULL
, toUnicode
, 256, gTrue
, NULL
, 0);
99 CharCodeToUnicode
*CharCodeToUnicode::parseCMap(GString
*buf
, int nBits
) {
100 CharCodeToUnicode
*ctu
;
103 ctu
= new CharCodeToUnicode(NULL
);
104 p
= buf
->getCString();
105 ctu
->parseCMap1(&getCharFromString
, &p
, nBits
);
109 void CharCodeToUnicode::parseCMap1(int (*getCharFunc
)(void *), void *data
,
112 char tok1
[256], tok2
[256], tok3
[256];
113 int nDigits
, n1
, n2
, n3
;
115 CharCode code1
, code2
;
123 pst
= new PSTokenizer(getCharFunc
, data
);
124 pst
->getToken(tok1
, sizeof(tok1
), &n1
);
125 while (pst
->getToken(tok2
, sizeof(tok2
), &n2
)) {
126 if (!strcmp(tok2
, "usecmap")) {
127 if (tok1
[0] == '/') {
128 name
= new GString(tok1
+ 1);
129 if ((f
= globalParams
->findToUnicodeFile(name
))) {
130 parseCMap1(&getCharFromFile
, f
, nBits
);
133 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
138 pst
->getToken(tok1
, sizeof(tok1
), &n1
);
139 } else if (!strcmp(tok2
, "beginbfchar")) {
140 while (pst
->getToken(tok1
, sizeof(tok1
), &n1
)) {
141 if (!strcmp(tok1
, "endbfchar")) {
144 if (!pst
->getToken(tok2
, sizeof(tok2
), &n2
) ||
145 !strcmp(tok2
, "endbfchar")) {
146 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
149 if (!(n1
== 2 + nDigits
&& tok1
[0] == '<' && tok1
[n1
- 1] == '>' &&
150 tok2
[0] == '<' && tok2
[n2
- 1] == '>')) {
151 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
154 tok1
[n1
- 1] = tok2
[n2
- 1] = '\0';
155 if (sscanf(tok1
+ 1, "%x", &code1
) != 1) {
156 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
159 if (code1
>= mapLen
) {
161 mapLen
= (code1
+ 256) & ~255;
162 map
= (Unicode
*)grealloc(map
, mapLen
* sizeof(Unicode
));
163 for (i
= oldLen
; i
< mapLen
; ++i
) {
168 if (sscanf(tok2
+ 1, "%x", &u
) != 1) {
169 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
175 if (sMapLen
== sMapSize
) {
177 sMap
= (CharCodeToUnicodeString
*)
178 grealloc(sMap
, sMapSize
* sizeof(CharCodeToUnicodeString
));
180 sMap
[sMapLen
].c
= code1
;
181 sMap
[sMapLen
].len
= (n2
- 2) / 4;
182 for (j
= 0; j
< sMap
[sMapLen
].len
&& j
< maxUnicodeString
; ++j
) {
183 strncpy(uHex
, tok2
+ 1 + j
*4, 4);
185 if (sscanf(uHex
, "%x", &sMap
[sMapLen
].u
[j
]) != 1) {
186 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
192 pst
->getToken(tok1
, sizeof(tok1
), &n1
);
193 } else if (!strcmp(tok2
, "beginbfrange")) {
194 while (pst
->getToken(tok1
, sizeof(tok1
), &n1
)) {
195 if (!strcmp(tok1
, "endbfrange")) {
198 if (!pst
->getToken(tok2
, sizeof(tok2
), &n2
) ||
199 !strcmp(tok2
, "endbfrange") ||
200 !pst
->getToken(tok3
, sizeof(tok3
), &n3
) ||
201 !strcmp(tok3
, "endbfrange")) {
202 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
205 if (!(n1
== 2 + nDigits
&& tok1
[0] == '<' && tok1
[n1
- 1] == '>' &&
206 n2
== 2 + nDigits
&& tok2
[0] == '<' && tok2
[n2
- 1] == '>' &&
207 tok3
[0] == '<' && tok3
[n3
- 1] == '>')) {
208 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
211 tok1
[n1
- 1] = tok2
[n2
- 1] = tok3
[n3
- 1] = '\0';
212 if (sscanf(tok1
+ 1, "%x", &code1
) != 1 ||
213 sscanf(tok2
+ 1, "%x", &code2
) != 1) {
214 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
217 if (code2
>= mapLen
) {
219 mapLen
= (code2
+ 256) & ~255;
220 map
= (Unicode
*)grealloc(map
, mapLen
* sizeof(Unicode
));
221 for (i
= oldLen
; i
< mapLen
; ++i
) {
226 if (sscanf(tok3
+ 1, "%x", &u
) != 1) {
227 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
230 for (; code1
<= code2
; ++code1
) {
234 if (sMapLen
+ (int)(code2
- code1
+ 1) > sMapSize
) {
235 sMapSize
= (sMapSize
+ (code2
- code1
+ 1) + 7) & ~7;
236 sMap
= (CharCodeToUnicodeString
*)
237 grealloc(sMap
, sMapSize
* sizeof(CharCodeToUnicodeString
));
239 for (i
= 0; code1
<= code2
; ++code1
, ++i
) {
241 sMap
[sMapLen
].c
= code1
;
242 sMap
[sMapLen
].len
= (n3
- 2) / 4;
243 for (j
= 0; j
< sMap
[sMapLen
].len
&& j
< maxUnicodeString
; ++j
) {
244 strncpy(uHex
, tok3
+ 1 + j
*4, 4);
246 if (sscanf(uHex
, "%x", &sMap
[sMapLen
].u
[j
]) != 1) {
247 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
250 sMap
[sMapLen
].u
[sMap
[sMapLen
].len
- 1] += i
;
255 pst
->getToken(tok1
, sizeof(tok1
), &n1
);
263 CharCodeToUnicode::CharCodeToUnicode(GString
*collectionA
) {
266 collection
= collectionA
;
268 map
= (Unicode
*)gmalloc(mapLen
* sizeof(Unicode
));
269 for (i
= 0; i
< mapLen
; ++i
) {
273 sMapLen
= sMapSize
= 0;
277 CharCodeToUnicode::CharCodeToUnicode(GString
*collectionA
, Unicode
*mapA
,
278 CharCode mapLenA
, GBool copyMap
,
279 CharCodeToUnicodeString
*sMapA
,
281 collection
= collectionA
;
284 map
= (Unicode
*)gmalloc(mapLen
* sizeof(Unicode
));
285 memcpy(map
, mapA
, mapLen
* sizeof(Unicode
));
290 sMapLen
= sMapSize
= sMapLenA
;
294 CharCodeToUnicode::~CharCodeToUnicode() {
304 void CharCodeToUnicode::incRefCnt() {
308 void CharCodeToUnicode::decRefCnt() {
314 GBool
CharCodeToUnicode::match(GString
*collectionA
) {
315 return collection
&& !collection
->cmp(collectionA
);
318 int CharCodeToUnicode::mapToUnicode(CharCode c
, Unicode
*u
, int size
) {
328 for (i
= 0; i
< sMapLen
; ++i
) {
329 if (sMap
[i
].c
== c
) {
330 for (j
= 0; j
< sMap
[i
].len
&& j
< size
; ++j
) {
339 //------------------------------------------------------------------------
341 CIDToUnicodeCache::CIDToUnicodeCache() {
344 for (i
= 0; i
< cidToUnicodeCacheSize
; ++i
) {
349 CIDToUnicodeCache::~CIDToUnicodeCache() {
352 for (i
= 0; i
< cidToUnicodeCacheSize
; ++i
) {
354 cache
[i
]->decRefCnt();
359 CharCodeToUnicode
*CIDToUnicodeCache::getCIDToUnicode(GString
*collection
) {
360 CharCodeToUnicode
*ctu
;
363 if (cache
[0] && cache
[0]->match(collection
)) {
364 cache
[0]->incRefCnt();
367 for (i
= 1; i
< cidToUnicodeCacheSize
; ++i
) {
368 if (cache
[i
] && cache
[i
]->match(collection
)) {
370 for (j
= i
; j
>= 1; --j
) {
371 cache
[j
] = cache
[j
- 1];
378 if ((ctu
= CharCodeToUnicode::parseCIDToUnicode(collection
))) {
379 if (cache
[cidToUnicodeCacheSize
- 1]) {
380 cache
[cidToUnicodeCacheSize
- 1]->decRefCnt();
382 for (j
= cidToUnicodeCacheSize
- 1; j
>= 1; --j
) {
383 cache
[j
] = cache
[j
- 1];