1 //========================================================================
3 // CharCodeToUnicode.cc
5 // Copyright 2001-2003 Glyph & Cog, LLC
7 //========================================================================
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
21 #include "GlobalParams.h"
22 #include "PSTokenizer.h"
23 #include "CharCodeToUnicode.h"
25 //------------------------------------------------------------------------
27 #define maxUnicodeString 8
29 struct CharCodeToUnicodeString
{
31 Unicode u
[maxUnicodeString
];
35 //------------------------------------------------------------------------
37 static int getCharFromString(void *data
) {
51 static int getCharFromFile(void *data
) {
52 return fgetc((FILE *)data
);
55 //------------------------------------------------------------------------
57 CharCodeToUnicode
*CharCodeToUnicode::parseCIDToUnicode(GString
*fileName
,
58 GString
*collection
) {
61 CharCode size
, mapLenA
;
64 CharCodeToUnicode
*ctu
;
66 if (!(f
= fopen(fileName
->getCString(), "r"))) {
67 error(-1, "Couldn't open cidToUnicode file '%s'",
68 fileName
->getCString());
73 mapA
= (Unicode
*)gmalloc(size
* sizeof(Unicode
));
76 while (getLine(buf
, sizeof(buf
), f
)) {
77 if (mapLenA
== size
) {
79 mapA
= (Unicode
*)grealloc(mapA
, size
* sizeof(Unicode
));
81 if (sscanf(buf
, "%x", &u
) == 1) {
84 error(-1, "Bad line (%d) in cidToUnicode file '%s'",
85 (int)(mapLenA
+ 1), fileName
->getCString());
92 ctu
= new CharCodeToUnicode(collection
->copy(), mapA
, mapLenA
, gTrue
,
98 CharCodeToUnicode
*CharCodeToUnicode::parseUnicodeToUnicode(
102 CharCodeToUnicodeString
*sMapA
;
103 CharCode size
, oldSize
, len
, sMapSizeA
, sMapLenA
;
107 Unicode uBuf
[maxUnicodeString
];
108 CharCodeToUnicode
*ctu
;
111 if (!(f
= fopen(fileName
->getCString(), "r"))) {
112 error(-1, "Couldn't open unicodeToUnicode file '%s'",
113 fileName
->getCString());
118 mapA
= (Unicode
*)gmalloc(size
* sizeof(Unicode
));
119 memset(mapA
, 0, size
* sizeof(Unicode
));
122 sMapSizeA
= sMapLenA
= 0;
125 while (getLine(buf
, sizeof(buf
), f
)) {
127 if (!(tok
= strtok(buf
, " \t\r\n")) ||
128 sscanf(tok
, "%x", &u0
) != 1) {
129 error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
130 line
, fileName
->getCString());
134 while (n
< maxUnicodeString
) {
135 if (!(tok
= strtok(NULL
, " \t\r\n"))) {
138 if (sscanf(tok
, "%x", &uBuf
[n
]) != 1) {
139 error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
140 line
, fileName
->getCString());
146 error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
147 line
, fileName
->getCString());
155 mapA
= (Unicode
*)grealloc(mapA
, size
* sizeof(Unicode
));
156 memset(mapA
+ oldSize
, 0, (size
- oldSize
) * sizeof(Unicode
));
162 if (sMapLenA
== sMapSizeA
) {
164 sMapA
= (CharCodeToUnicodeString
*)
165 grealloc(sMapA
, sMapSizeA
* sizeof(CharCodeToUnicodeString
));
167 sMapA
[sMapLenA
].c
= u0
;
168 for (i
= 0; i
< n
; ++i
) {
169 sMapA
[sMapLenA
].u
[i
] = uBuf
[i
];
171 sMapA
[sMapLenA
].len
= n
;
180 ctu
= new CharCodeToUnicode(fileName
->copy(), mapA
, len
, gTrue
,
181 sMapA
, sMapLenA
, sMapSizeA
);
186 CharCodeToUnicode
*CharCodeToUnicode::make8BitToUnicode(Unicode
*toUnicode
) {
187 return new CharCodeToUnicode(NULL
, toUnicode
, 256, gTrue
, NULL
, 0, 0);
190 CharCodeToUnicode
*CharCodeToUnicode::parseCMap(GString
*buf
, int nBits
) {
191 CharCodeToUnicode
*ctu
;
194 ctu
= new CharCodeToUnicode(NULL
);
195 p
= buf
->getCString();
196 ctu
->parseCMap1(&getCharFromString
, &p
, nBits
);
200 void CharCodeToUnicode::mergeCMap(GString
*buf
, int nBits
) {
203 p
= buf
->getCString();
204 parseCMap1(&getCharFromString
, &p
, nBits
);
207 void CharCodeToUnicode::parseCMap1(int (*getCharFunc
)(void *), void *data
,
210 char tok1
[256], tok2
[256], tok3
[256];
211 int nDigits
, n1
, n2
, n3
;
213 CharCode code1
, code2
;
218 pst
= new PSTokenizer(getCharFunc
, data
);
219 pst
->getToken(tok1
, sizeof(tok1
), &n1
);
220 while (pst
->getToken(tok2
, sizeof(tok2
), &n2
)) {
221 if (!strcmp(tok2
, "usecmap")) {
222 if (tok1
[0] == '/') {
223 name
= new GString(tok1
+ 1);
224 if ((f
= globalParams
->findToUnicodeFile(name
))) {
225 parseCMap1(&getCharFromFile
, f
, nBits
);
228 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
233 pst
->getToken(tok1
, sizeof(tok1
), &n1
);
234 } else if (!strcmp(tok2
, "beginbfchar")) {
235 while (pst
->getToken(tok1
, sizeof(tok1
), &n1
)) {
236 if (!strcmp(tok1
, "endbfchar")) {
239 if (!pst
->getToken(tok2
, sizeof(tok2
), &n2
) ||
240 !strcmp(tok2
, "endbfchar")) {
241 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
244 if (!(n1
== 2 + nDigits
&& tok1
[0] == '<' && tok1
[n1
- 1] == '>' &&
245 tok2
[0] == '<' && tok2
[n2
- 1] == '>')) {
246 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
249 tok1
[n1
- 1] = tok2
[n2
- 1] = '\0';
250 if (sscanf(tok1
+ 1, "%x", &code1
) != 1) {
251 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
254 addMapping(code1
, tok2
+ 1, n2
- 1, 0);
256 pst
->getToken(tok1
, sizeof(tok1
), &n1
);
257 } else if (!strcmp(tok2
, "beginbfrange")) {
258 while (pst
->getToken(tok1
, sizeof(tok1
), &n1
)) {
259 if (!strcmp(tok1
, "endbfrange")) {
262 if (!pst
->getToken(tok2
, sizeof(tok2
), &n2
) ||
263 !strcmp(tok2
, "endbfrange") ||
264 !pst
->getToken(tok3
, sizeof(tok3
), &n3
) ||
265 !strcmp(tok3
, "endbfrange")) {
266 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
269 if (!(n1
== 2 + nDigits
&& tok1
[0] == '<' && tok1
[n1
- 1] == '>' &&
270 n2
== 2 + nDigits
&& tok2
[0] == '<' && tok2
[n2
- 1] == '>')) {
271 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
274 tok1
[n1
- 1] = tok2
[n2
- 1] = '\0';
275 if (sscanf(tok1
+ 1, "%x", &code1
) != 1 ||
276 sscanf(tok2
+ 1, "%x", &code2
) != 1) {
277 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
280 if (!strcmp(tok3
, "[")) {
282 while (pst
->getToken(tok1
, sizeof(tok1
), &n1
) &&
283 code1
+ i
<= code2
) {
284 if (!strcmp(tok1
, "]")) {
287 if (tok1
[0] == '<' && tok1
[n1
- 1] == '>') {
289 addMapping(code1
+ i
, tok1
+ 1, n1
- 2, 0);
291 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
295 } else if (tok3
[0] == '<' && tok3
[n3
- 1] == '>') {
297 for (i
= 0; code1
<= code2
; ++code1
, ++i
) {
298 addMapping(code1
, tok3
+ 1, n3
- 2, i
);
302 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
305 pst
->getToken(tok1
, sizeof(tok1
), &n1
);
313 void CharCodeToUnicode::addMapping(CharCode code
, char *uStr
, int n
,
320 if (code
>= mapLen
) {
322 mapLen
= (code
+ 256) & ~255;
323 map
= (Unicode
*)grealloc(map
, mapLen
* sizeof(Unicode
));
324 for (i
= oldLen
; i
< mapLen
; ++i
) {
329 if (sscanf(uStr
, "%x", &u
) != 1) {
330 error(-1, "Illegal entry in ToUnicode CMap");
333 map
[code
] = u
+ offset
;
335 if (sMapLen
>= sMapSize
) {
336 sMapSize
= sMapSize
+ 16;
337 sMap
= (CharCodeToUnicodeString
*)
338 grealloc(sMap
, sMapSize
* sizeof(CharCodeToUnicodeString
));
341 sMap
[sMapLen
].c
= code
;
342 sMap
[sMapLen
].len
= n
/ 4;
343 for (j
= 0; j
< sMap
[sMapLen
].len
&& j
< maxUnicodeString
; ++j
) {
344 strncpy(uHex
, uStr
+ j
*4, 4);
346 if (sscanf(uHex
, "%x", &sMap
[sMapLen
].u
[j
]) != 1) {
347 error(-1, "Illegal entry in ToUnicode CMap");
350 sMap
[sMapLen
].u
[sMap
[sMapLen
].len
- 1] += offset
;
355 CharCodeToUnicode::CharCodeToUnicode(GString
*tagA
) {
360 map
= (Unicode
*)gmalloc(mapLen
* sizeof(Unicode
));
361 for (i
= 0; i
< mapLen
; ++i
) {
365 sMapLen
= sMapSize
= 0;
372 CharCodeToUnicode::CharCodeToUnicode(GString
*tagA
, Unicode
*mapA
,
373 CharCode mapLenA
, GBool copyMap
,
374 CharCodeToUnicodeString
*sMapA
,
375 int sMapLenA
, int sMapSizeA
) {
379 map
= (Unicode
*)gmalloc(mapLen
* sizeof(Unicode
));
380 memcpy(map
, mapA
, mapLen
* sizeof(Unicode
));
386 sMapSize
= sMapSizeA
;
393 CharCodeToUnicode::~CharCodeToUnicode() {
402 gDestroyMutex(&mutex
);
406 void CharCodeToUnicode::incRefCnt() {
412 gUnlockMutex(&mutex
);
416 void CharCodeToUnicode::decRefCnt() {
422 done
= --refCnt
== 0;
424 gUnlockMutex(&mutex
);
431 GBool
CharCodeToUnicode::match(GString
*tagA
) {
432 return tag
&& !tag
->cmp(tagA
);
435 void CharCodeToUnicode::setMapping(CharCode c
, Unicode
*u
, int len
) {
442 if (sMapLen
== sMapSize
) {
444 sMap
= (CharCodeToUnicodeString
*)
445 grealloc(sMap
, sMapSize
* sizeof(CharCodeToUnicodeString
));
448 sMap
[sMapLen
].len
= len
;
449 for (i
= 0; i
< len
&& i
< maxUnicodeString
; ++i
) {
450 sMap
[sMapLen
].u
[i
] = u
[i
];
456 int CharCodeToUnicode::mapToUnicode(CharCode c
, Unicode
*u
, int size
) {
466 for (i
= 0; i
< sMapLen
; ++i
) {
467 if (sMap
[i
].c
== c
) {
468 for (j
= 0; j
< sMap
[i
].len
&& j
< size
; ++j
) {
477 //------------------------------------------------------------------------
479 CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA
) {
483 cache
= (CharCodeToUnicode
**)gmalloc(size
* sizeof(CharCodeToUnicode
*));
484 for (i
= 0; i
< size
; ++i
) {
489 CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
492 for (i
= 0; i
< size
; ++i
) {
494 cache
[i
]->decRefCnt();
500 CharCodeToUnicode
*CharCodeToUnicodeCache::getCharCodeToUnicode(GString
*tag
) {
501 CharCodeToUnicode
*ctu
;
504 if (cache
[0] && cache
[0]->match(tag
)) {
505 cache
[0]->incRefCnt();
508 for (i
= 1; i
< size
; ++i
) {
509 if (cache
[i
] && cache
[i
]->match(tag
)) {
511 for (j
= i
; j
>= 1; --j
) {
512 cache
[j
] = cache
[j
- 1];
522 void CharCodeToUnicodeCache::add(CharCodeToUnicode
*ctu
) {
525 if (cache
[size
- 1]) {
526 cache
[size
- 1]->decRefCnt();
528 for (i
= size
- 1; i
>= 1; --i
) {
529 cache
[i
] = cache
[i
- 1];