upgrade to xpdf 3.00.
[swftools.git] / pdf2swf / xpdf / CharCodeToUnicode.cc
blob2e2ad478905bb1b8f36b4c440f6990082929f3e9
1 //========================================================================
2 //
3 // CharCodeToUnicode.cc
4 //
5 // Copyright 2001-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
9 #include <aconf.h>
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
13 #endif
15 #include <stdio.h>
16 #include <string.h>
17 #include "gmem.h"
18 #include "gfile.h"
19 #include "GString.h"
20 #include "Error.h"
21 #include "GlobalParams.h"
22 #include "PSTokenizer.h"
23 #include "CharCodeToUnicode.h"
25 //------------------------------------------------------------------------
27 #define maxUnicodeString 8
29 struct CharCodeToUnicodeString {
30 CharCode c;
31 Unicode u[maxUnicodeString];
32 int len;
35 //------------------------------------------------------------------------
37 static int getCharFromString(void *data) {
38 char *p;
39 int c;
41 p = *(char **)data;
42 if (*p) {
43 c = *p++;
44 *(char **)data = p;
45 } else {
46 c = EOF;
48 return c;
51 static int getCharFromFile(void *data) {
52 return fgetc((FILE *)data);
55 //------------------------------------------------------------------------
57 CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName,
58 GString *collection) {
59 FILE *f;
60 Unicode *mapA;
61 CharCode size, mapLenA;
62 char buf[64];
63 Unicode u;
64 CharCodeToUnicode *ctu;
66 if (!(f = fopen(fileName->getCString(), "r"))) {
67 error(-1, "Couldn't open cidToUnicode file '%s'",
68 fileName->getCString());
69 return NULL;
72 size = 32768;
73 mapA = (Unicode *)gmalloc(size * sizeof(Unicode));
74 mapLenA = 0;
76 while (getLine(buf, sizeof(buf), f)) {
77 if (mapLenA == size) {
78 size *= 2;
79 mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode));
81 if (sscanf(buf, "%x", &u) == 1) {
82 mapA[mapLenA] = u;
83 } else {
84 error(-1, "Bad line (%d) in cidToUnicode file '%s'",
85 (int)(mapLenA + 1), fileName->getCString());
86 mapA[mapLenA] = 0;
88 ++mapLenA;
90 fclose(f);
92 ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
93 NULL, 0, 0);
94 gfree(mapA);
95 return ctu;
98 CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
99 GString *fileName) {
100 FILE *f;
101 Unicode *mapA;
102 CharCodeToUnicodeString *sMapA;
103 CharCode size, oldSize, len, sMapSizeA, sMapLenA;
104 char buf[256];
105 char *tok;
106 Unicode u0;
107 Unicode uBuf[maxUnicodeString];
108 CharCodeToUnicode *ctu;
109 int line, n, i;
111 if (!(f = fopen(fileName->getCString(), "r"))) {
112 error(-1, "Couldn't open unicodeToUnicode file '%s'",
113 fileName->getCString());
114 return NULL;
117 size = 4096;
118 mapA = (Unicode *)gmalloc(size * sizeof(Unicode));
119 memset(mapA, 0, size * sizeof(Unicode));
120 len = 0;
121 sMapA = NULL;
122 sMapSizeA = sMapLenA = 0;
124 line = 0;
125 while (getLine(buf, sizeof(buf), f)) {
126 ++line;
127 if (!(tok = strtok(buf, " \t\r\n")) ||
128 sscanf(tok, "%x", &u0) != 1) {
129 error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
130 line, fileName->getCString());
131 continue;
133 n = 0;
134 while (n < maxUnicodeString) {
135 if (!(tok = strtok(NULL, " \t\r\n"))) {
136 break;
138 if (sscanf(tok, "%x", &uBuf[n]) != 1) {
139 error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
140 line, fileName->getCString());
141 break;
143 ++n;
145 if (n < 1) {
146 error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
147 line, fileName->getCString());
148 continue;
150 if (u0 >= size) {
151 oldSize = size;
152 while (u0 >= size) {
153 size *= 2;
155 mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode));
156 memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
158 if (n == 1) {
159 mapA[u0] = uBuf[0];
160 } else {
161 mapA[u0] = 0;
162 if (sMapLenA == sMapSizeA) {
163 sMapSizeA += 16;
164 sMapA = (CharCodeToUnicodeString *)
165 grealloc(sMapA, sMapSizeA * sizeof(CharCodeToUnicodeString));
167 sMapA[sMapLenA].c = u0;
168 for (i = 0; i < n; ++i) {
169 sMapA[sMapLenA].u[i] = uBuf[i];
171 sMapA[sMapLenA].len = n;
172 ++sMapLenA;
174 if (u0 >= len) {
175 len = u0 + 1;
178 fclose(f);
180 ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
181 sMapA, sMapLenA, sMapSizeA);
182 gfree(mapA);
183 return ctu;
186 CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
187 return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
190 CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
191 CharCodeToUnicode *ctu;
192 char *p;
194 ctu = new CharCodeToUnicode(NULL);
195 p = buf->getCString();
196 ctu->parseCMap1(&getCharFromString, &p, nBits);
197 return ctu;
200 void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) {
201 char *p;
203 p = buf->getCString();
204 parseCMap1(&getCharFromString, &p, nBits);
207 void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
208 int nBits) {
209 PSTokenizer *pst;
210 char tok1[256], tok2[256], tok3[256];
211 int nDigits, n1, n2, n3;
212 CharCode i;
213 CharCode code1, code2;
214 GString *name;
215 FILE *f;
217 nDigits = nBits / 4;
218 pst = new PSTokenizer(getCharFunc, data);
219 pst->getToken(tok1, sizeof(tok1), &n1);
220 while (pst->getToken(tok2, sizeof(tok2), &n2)) {
221 if (!strcmp(tok2, "usecmap")) {
222 if (tok1[0] == '/') {
223 name = new GString(tok1 + 1);
224 if ((f = globalParams->findToUnicodeFile(name))) {
225 parseCMap1(&getCharFromFile, f, nBits);
226 fclose(f);
227 } else {
228 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
229 name->getCString());
231 delete name;
233 pst->getToken(tok1, sizeof(tok1), &n1);
234 } else if (!strcmp(tok2, "beginbfchar")) {
235 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
236 if (!strcmp(tok1, "endbfchar")) {
237 break;
239 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
240 !strcmp(tok2, "endbfchar")) {
241 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
242 break;
244 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
245 tok2[0] == '<' && tok2[n2 - 1] == '>')) {
246 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
247 continue;
249 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
250 if (sscanf(tok1 + 1, "%x", &code1) != 1) {
251 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
252 continue;
254 addMapping(code1, tok2 + 1, n2 - 1, 0);
256 pst->getToken(tok1, sizeof(tok1), &n1);
257 } else if (!strcmp(tok2, "beginbfrange")) {
258 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
259 if (!strcmp(tok1, "endbfrange")) {
260 break;
262 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
263 !strcmp(tok2, "endbfrange") ||
264 !pst->getToken(tok3, sizeof(tok3), &n3) ||
265 !strcmp(tok3, "endbfrange")) {
266 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
267 break;
269 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
270 n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>')) {
271 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
272 continue;
274 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
275 if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
276 sscanf(tok2 + 1, "%x", &code2) != 1) {
277 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
278 continue;
280 if (!strcmp(tok3, "[")) {
281 i = 0;
282 while (pst->getToken(tok1, sizeof(tok1), &n1) &&
283 code1 + i <= code2) {
284 if (!strcmp(tok1, "]")) {
285 break;
287 if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
288 tok1[n1 - 1] = '\0';
289 addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
290 } else {
291 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
293 ++i;
295 } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
296 tok3[n3 - 1] = '\0';
297 for (i = 0; code1 <= code2; ++code1, ++i) {
298 addMapping(code1, tok3 + 1, n3 - 2, i);
301 } else {
302 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
305 pst->getToken(tok1, sizeof(tok1), &n1);
306 } else {
307 strcpy(tok1, tok2);
310 delete pst;
313 void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
314 int offset) {
315 CharCode oldLen, i;
316 Unicode u;
317 char uHex[5];
318 int j;
320 if (code >= mapLen) {
321 oldLen = mapLen;
322 mapLen = (code + 256) & ~255;
323 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
324 for (i = oldLen; i < mapLen; ++i) {
325 map[i] = 0;
328 if (n <= 4) {
329 if (sscanf(uStr, "%x", &u) != 1) {
330 error(-1, "Illegal entry in ToUnicode CMap");
331 return;
333 map[code] = u + offset;
334 } else {
335 if (sMapLen >= sMapSize) {
336 sMapSize = sMapSize + 16;
337 sMap = (CharCodeToUnicodeString *)
338 grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
340 map[code] = 0;
341 sMap[sMapLen].c = code;
342 sMap[sMapLen].len = n / 4;
343 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
344 strncpy(uHex, uStr + j*4, 4);
345 uHex[4] = '\0';
346 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
347 error(-1, "Illegal entry in ToUnicode CMap");
350 sMap[sMapLen].u[sMap[sMapLen].len - 1] += offset;
351 ++sMapLen;
355 CharCodeToUnicode::CharCodeToUnicode(GString *tagA) {
356 CharCode i;
358 tag = tagA;
359 mapLen = 256;
360 map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
361 for (i = 0; i < mapLen; ++i) {
362 map[i] = 0;
364 sMap = NULL;
365 sMapLen = sMapSize = 0;
366 refCnt = 1;
367 #if MULTITHREADED
368 gInitMutex(&mutex);
369 #endif
372 CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA,
373 CharCode mapLenA, GBool copyMap,
374 CharCodeToUnicodeString *sMapA,
375 int sMapLenA, int sMapSizeA) {
376 tag = tagA;
377 mapLen = mapLenA;
378 if (copyMap) {
379 map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
380 memcpy(map, mapA, mapLen * sizeof(Unicode));
381 } else {
382 map = mapA;
384 sMap = sMapA;
385 sMapLen = sMapLenA;
386 sMapSize = sMapSizeA;
387 refCnt = 1;
388 #if MULTITHREADED
389 gInitMutex(&mutex);
390 #endif
393 CharCodeToUnicode::~CharCodeToUnicode() {
394 if (tag) {
395 delete tag;
397 gfree(map);
398 if (sMap) {
399 gfree(sMap);
401 #if MULTITHREADED
402 gDestroyMutex(&mutex);
403 #endif
406 void CharCodeToUnicode::incRefCnt() {
407 #if MULTITHREADED
408 gLockMutex(&mutex);
409 #endif
410 ++refCnt;
411 #if MULTITHREADED
412 gUnlockMutex(&mutex);
413 #endif
416 void CharCodeToUnicode::decRefCnt() {
417 GBool done;
419 #if MULTITHREADED
420 gLockMutex(&mutex);
421 #endif
422 done = --refCnt == 0;
423 #if MULTITHREADED
424 gUnlockMutex(&mutex);
425 #endif
426 if (done) {
427 delete this;
431 GBool CharCodeToUnicode::match(GString *tagA) {
432 return tag && !tag->cmp(tagA);
435 void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
436 int i;
438 if (len == 1) {
439 map[c] = u[0];
440 } else {
441 map[c] = 0;
442 if (sMapLen == sMapSize) {
443 sMapSize += 8;
444 sMap = (CharCodeToUnicodeString *)
445 grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
447 sMap[sMapLen].c = c;
448 sMap[sMapLen].len = len;
449 for (i = 0; i < len && i < maxUnicodeString; ++i) {
450 sMap[sMapLen].u[i] = u[i];
452 ++sMapLen;
456 int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
457 int i, j;
459 if (c >= mapLen) {
460 return 0;
462 if (map[c]) {
463 u[0] = map[c];
464 return 1;
466 for (i = 0; i < sMapLen; ++i) {
467 if (sMap[i].c == c) {
468 for (j = 0; j < sMap[i].len && j < size; ++j) {
469 u[j] = sMap[i].u[j];
471 return j;
474 return 0;
477 //------------------------------------------------------------------------
479 CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
480 int i;
482 size = sizeA;
483 cache = (CharCodeToUnicode **)gmalloc(size * sizeof(CharCodeToUnicode *));
484 for (i = 0; i < size; ++i) {
485 cache[i] = NULL;
489 CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
490 int i;
492 for (i = 0; i < size; ++i) {
493 if (cache[i]) {
494 cache[i]->decRefCnt();
497 gfree(cache);
500 CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) {
501 CharCodeToUnicode *ctu;
502 int i, j;
504 if (cache[0] && cache[0]->match(tag)) {
505 cache[0]->incRefCnt();
506 return cache[0];
508 for (i = 1; i < size; ++i) {
509 if (cache[i] && cache[i]->match(tag)) {
510 ctu = cache[i];
511 for (j = i; j >= 1; --j) {
512 cache[j] = cache[j - 1];
514 cache[0] = ctu;
515 ctu->incRefCnt();
516 return ctu;
519 return NULL;
522 void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
523 int i;
525 if (cache[size - 1]) {
526 cache[size - 1]->decRefCnt();
528 for (i = size - 1; i >= 1; --i) {
529 cache[i] = cache[i - 1];
531 cache[0] = ctu;
532 ctu->incRefCnt();