fixed windows access violation which occurs if one tries to retrieve
[swftools.git] / pdf2swf / xpdf / CharCodeToUnicode.cc
blob912981e99e96f6562ad9a53c77aef56e043457a5
1 //========================================================================
2 //
3 // CharCodeToUnicode.cc
4 //
5 // Copyright 2001-2002 Glyph & Cog, LLC
6 //
7 //========================================================================
9 #ifdef __GNUC__
10 #pragma implementation
11 #endif
13 #include <aconf.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include "gmem.h"
17 #include "gfile.h"
18 #include "GString.h"
19 #include "Error.h"
20 #include "GlobalParams.h"
21 #include "PSTokenizer.h"
22 #include "CharCodeToUnicode.h"
24 //------------------------------------------------------------------------
26 #define maxUnicodeString 8
28 struct CharCodeToUnicodeString {
29 CharCode c;
30 Unicode u[maxUnicodeString];
31 int len;
34 //------------------------------------------------------------------------
36 static int getCharFromString(void *data) {
37 char *p;
38 int c;
40 p = *(char **)data;
41 if (*p) {
42 c = *p++;
43 *(char **)data = p;
44 } else {
45 c = EOF;
47 return c;
50 static int getCharFromFile(void *data) {
51 return fgetc((FILE *)data);
54 //------------------------------------------------------------------------
56 CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) {
57 FILE *f;
58 Unicode *mapA;
59 CharCode size, mapLenA;
60 char buf[64];
61 Unicode u;
62 CharCodeToUnicode *ctu;
64 if (!(f = globalParams->getCIDToUnicodeFile(collectionA))) {
65 error(-1, "Couldn't find cidToUnicode file for the '%s' collection",
66 collectionA->getCString());
67 return NULL;
70 size = 32768;
71 mapA = (Unicode *)gmalloc(size * sizeof(Unicode));
72 mapLenA = 0;
74 while (getLine(buf, sizeof(buf), f)) {
75 if (mapLenA == size) {
76 size *= 2;
77 mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode));
79 if (sscanf(buf, "%x", &u) == 1) {
80 mapA[mapLenA] = u;
81 } else {
82 error(-1, "Bad line (%d) in cidToUnicode file for the '%s' collection",
83 (int)(mapLenA + 1), collectionA->getCString());
84 mapA[mapLenA] = 0;
86 ++mapLenA;
89 ctu = new CharCodeToUnicode(collectionA->copy(), mapA, mapLenA, gTrue,
90 NULL, 0);
91 gfree(mapA);
92 return ctu;
95 CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
96 return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0);
99 CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
100 CharCodeToUnicode *ctu;
101 char *p;
103 ctu = new CharCodeToUnicode(NULL);
104 p = buf->getCString();
105 ctu->parseCMap1(&getCharFromString, &p, nBits);
106 return ctu;
109 void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
110 int nBits) {
111 PSTokenizer *pst;
112 char tok1[256], tok2[256], tok3[256];
113 int nDigits, n1, n2, n3;
114 CharCode oldLen, i;
115 CharCode code1, code2;
116 Unicode u;
117 char uHex[5];
118 int j;
119 GString *name;
120 FILE *f;
122 nDigits = nBits / 4;
123 pst = new PSTokenizer(getCharFunc, data);
124 pst->getToken(tok1, sizeof(tok1), &n1);
125 while (pst->getToken(tok2, sizeof(tok2), &n2)) {
126 if (!strcmp(tok2, "usecmap")) {
127 if (tok1[0] == '/') {
128 name = new GString(tok1 + 1);
129 if ((f = globalParams->findToUnicodeFile(name))) {
130 parseCMap1(&getCharFromFile, f, nBits);
131 fclose(f);
132 } else {
133 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
134 name->getCString());
136 delete name;
138 pst->getToken(tok1, sizeof(tok1), &n1);
139 } else if (!strcmp(tok2, "beginbfchar")) {
140 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
141 if (!strcmp(tok1, "endbfchar")) {
142 break;
144 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
145 !strcmp(tok2, "endbfchar")) {
146 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
147 break;
149 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
150 tok2[0] == '<' && tok2[n2 - 1] == '>')) {
151 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
152 continue;
154 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
155 if (sscanf(tok1 + 1, "%x", &code1) != 1) {
156 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
157 continue;
159 if (code1 >= mapLen) {
160 oldLen = mapLen;
161 mapLen = (code1 + 256) & ~255;
162 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
163 for (i = oldLen; i < mapLen; ++i) {
164 map[i] = 0;
167 if (n2 == 6) {
168 if (sscanf(tok2 + 1, "%x", &u) != 1) {
169 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
170 continue;
172 map[code1] = u;
173 } else {
174 map[code1] = 0;
175 if (sMapLen == sMapSize) {
176 sMapSize += 8;
177 sMap = (CharCodeToUnicodeString *)
178 grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
180 sMap[sMapLen].c = code1;
181 sMap[sMapLen].len = (n2 - 2) / 4;
182 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
183 strncpy(uHex, tok2 + 1 + j*4, 4);
184 uHex[4] = '\0';
185 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
186 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
189 ++sMapLen;
192 pst->getToken(tok1, sizeof(tok1), &n1);
193 } else if (!strcmp(tok2, "beginbfrange")) {
194 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
195 if (!strcmp(tok1, "endbfrange")) {
196 break;
198 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
199 !strcmp(tok2, "endbfrange") ||
200 !pst->getToken(tok3, sizeof(tok3), &n3) ||
201 !strcmp(tok3, "endbfrange")) {
202 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
203 break;
205 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
206 n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' &&
207 tok3[0] == '<' && tok3[n3 - 1] == '>')) {
208 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
209 continue;
211 tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0';
212 if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
213 sscanf(tok2 + 1, "%x", &code2) != 1) {
214 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
215 continue;
217 if (code2 >= mapLen) {
218 oldLen = mapLen;
219 mapLen = (code2 + 256) & ~255;
220 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
221 for (i = oldLen; i < mapLen; ++i) {
222 map[i] = 0;
225 if (n3 == 6) {
226 if (sscanf(tok3 + 1, "%x", &u) != 1) {
227 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
228 continue;
230 for (; code1 <= code2; ++code1) {
231 map[code1] = u++;
233 } else {
234 if (sMapLen + (int)(code2 - code1 + 1) > sMapSize) {
235 sMapSize = (sMapSize + (code2 - code1 + 1) + 7) & ~7;
236 sMap = (CharCodeToUnicodeString *)
237 grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
239 for (i = 0; code1 <= code2; ++code1, ++i) {
240 map[code1] = 0;
241 sMap[sMapLen].c = code1;
242 sMap[sMapLen].len = (n3 - 2) / 4;
243 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
244 strncpy(uHex, tok3 + 1 + j*4, 4);
245 uHex[4] = '\0';
246 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
247 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
250 sMap[sMapLen].u[sMap[sMapLen].len - 1] += i;
251 ++sMapLen;
255 pst->getToken(tok1, sizeof(tok1), &n1);
256 } else {
257 strcpy(tok1, tok2);
260 delete pst;
263 CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) {
264 CharCode i;
266 collection = collectionA;
267 mapLen = 256;
268 map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
269 for (i = 0; i < mapLen; ++i) {
270 map[i] = 0;
272 sMap = NULL;
273 sMapLen = sMapSize = 0;
274 refCnt = 1;
277 CharCodeToUnicode::CharCodeToUnicode(GString *collectionA, Unicode *mapA,
278 CharCode mapLenA, GBool copyMap,
279 CharCodeToUnicodeString *sMapA,
280 int sMapLenA) {
281 collection = collectionA;
282 mapLen = mapLenA;
283 if (copyMap) {
284 map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
285 memcpy(map, mapA, mapLen * sizeof(Unicode));
286 } else {
287 map = mapA;
289 sMap = sMapA;
290 sMapLen = sMapSize = sMapLenA;
291 refCnt = 1;
294 CharCodeToUnicode::~CharCodeToUnicode() {
295 if (collection) {
296 delete collection;
298 gfree(map);
299 if (sMap) {
300 gfree(sMap);
304 void CharCodeToUnicode::incRefCnt() {
305 ++refCnt;
308 void CharCodeToUnicode::decRefCnt() {
309 if (--refCnt == 0) {
310 delete this;
314 GBool CharCodeToUnicode::match(GString *collectionA) {
315 return collection && !collection->cmp(collectionA);
318 int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
319 int i, j;
321 if (c >= mapLen) {
322 return 0;
324 if (map[c]) {
325 u[0] = map[c];
326 return 1;
328 for (i = 0; i < sMapLen; ++i) {
329 if (sMap[i].c == c) {
330 for (j = 0; j < sMap[i].len && j < size; ++j) {
331 u[j] = sMap[i].u[j];
333 return j;
336 return 0;
339 //------------------------------------------------------------------------
341 CIDToUnicodeCache::CIDToUnicodeCache() {
342 int i;
344 for (i = 0; i < cidToUnicodeCacheSize; ++i) {
345 cache[i] = NULL;
349 CIDToUnicodeCache::~CIDToUnicodeCache() {
350 int i;
352 for (i = 0; i < cidToUnicodeCacheSize; ++i) {
353 if (cache[i]) {
354 cache[i]->decRefCnt();
359 CharCodeToUnicode *CIDToUnicodeCache::getCIDToUnicode(GString *collection) {
360 CharCodeToUnicode *ctu;
361 int i, j;
363 if (cache[0] && cache[0]->match(collection)) {
364 cache[0]->incRefCnt();
365 return cache[0];
367 for (i = 1; i < cidToUnicodeCacheSize; ++i) {
368 if (cache[i] && cache[i]->match(collection)) {
369 ctu = cache[i];
370 for (j = i; j >= 1; --j) {
371 cache[j] = cache[j - 1];
373 cache[0] = ctu;
374 ctu->incRefCnt();
375 return ctu;
378 if ((ctu = CharCodeToUnicode::parseCIDToUnicode(collection))) {
379 if (cache[cidToUnicodeCacheSize - 1]) {
380 cache[cidToUnicodeCacheSize - 1]->decRefCnt();
382 for (j = cidToUnicodeCacheSize - 1; j >= 1; --j) {
383 cache[j] = cache[j - 1];
385 cache[0] = ctu;
386 ctu->incRefCnt();
387 return ctu;
389 return NULL;