1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: tokens.c,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
34 #if (defined(_WIN32) || defined(_MSDOS) || defined(__IBMC__))
42 static char wbuf
[4 * OBS
];
43 static char *wbp
= wbuf
;
44 static int EBCDIC_ExternTokenDetected
= 0;
45 static int EBCDIC_StartTokenDetected
= 0;
47 unsigned char toLatin1
[256] =
49 0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d,
50 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13,
51 0x9d, 0x0a, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d,
52 0x1e, 0x1f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1b,
53 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, 0x90, 0x91,
54 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b,
55 0x14, 0x15, 0x9e, 0x1a, 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1,
56 0xe3, 0xe5, 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
57 0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf,
58 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 0x2d, 0x2f, 0xc2, 0xc4,
59 0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f,
60 0x3e, 0x3f, 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
61 0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
62 0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
63 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, 0xb0, 0x6a, 0x6b, 0x6c,
64 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8,
65 0xc6, 0xa4, 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
66 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae, 0xac, 0xa3,
67 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0xdd, 0xa8,
68 0xaf, 0x5d, 0xb4, 0xd7, 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45,
69 0x46, 0x47, 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
70 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52,
71 0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, 0x5c, 0xf7, 0x53, 0x54,
72 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2,
73 0xd3, 0xd5, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
74 0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f
80 memcpy_EBCDIC( char * pwbuf
, uchar
*p
, int len
)
83 int processedchars
= 0;
94 /* copy spaces until " or ' */
95 while( (p
[ processedchars
] != '\"') && (p
[ processedchars
] != '\'') )
96 pwbuf
[ currpos
++ ] = p
[ processedchars
++ ];
98 /* copy first " or ' */
99 pwbuf
[ currpos
++ ] = p
[ processedchars
++ ];
101 /* convert all characters until " or ' */
102 while( processedchars
< (len
- 1) )
104 if( p
[ processedchars
] == '\\' )
106 switch( p
[ ++processedchars
] )
109 currpos
+= sprintf( &pwbuf
[ currpos
], MASK
, toLatin1
['\n'] );
114 currpos
+= sprintf( &pwbuf
[ currpos
], MASK
, toLatin1
['\t'] );
119 currpos
+= sprintf( &pwbuf
[ currpos
], MASK
, toLatin1
['\v'] );
124 currpos
+= sprintf( &pwbuf
[ currpos
], MASK
, toLatin1
['\b'] );
129 currpos
+= sprintf( &pwbuf
[ currpos
], MASK
, toLatin1
['\r'] );
134 currpos
+= sprintf( &pwbuf
[ currpos
], MASK
, toLatin1
['\f'] );
139 currpos
+= sprintf( &pwbuf
[ currpos
], MASK
, toLatin1
['\a'] );
144 currpos
+= sprintf( &pwbuf
[ currpos
], MASK
, toLatin1
['\\'] );
149 currpos
+= sprintf( &pwbuf
[ currpos
], MASK
, toLatin1
['\?'] );
154 currpos
+= sprintf( &pwbuf
[ currpos
], MASK
, toLatin1
['\''] );
159 currpos
+= sprintf( &pwbuf
[ currpos
], MASK
, toLatin1
['\"'] );
163 /* octal coded character? -> copy */
173 int startpos
= currpos
;
175 pwbuf
[ currpos
++ ] = '\\';
177 while( p
[ processedchars
] >= '0' && p
[ processedchars
] <= '7' && (currpos
< startpos
+ 4) )
178 pwbuf
[ currpos
++ ] = (unsigned char)p
[ processedchars
++ ];
182 /* hex coded character? -> copy */
186 int startpos
= currpos
;
188 pwbuf
[ currpos
++ ] = '\\';
189 pwbuf
[ currpos
++ ] = 'x';
192 while( isxdigit( p
[ processedchars
] ) && (currpos
< startpos
+ 4) )
193 pwbuf
[ currpos
++ ] = (unsigned char)p
[ processedchars
++ ];
200 currpos
+= sprintf( &pwbuf
[ currpos
], MASK
, toLatin1
[p
[ processedchars
++ ]] );
204 /* copy last " or ' */
205 pwbuf
[ currpos
++ ] = p
[ processedchars
];
211 maketokenrow(int size
, Tokenrow
* trp
)
215 trp
->bp
= (Token
*) domalloc(size
* sizeof(Token
));
223 growtokenrow(Tokenrow
* trp
)
225 int ncur
= trp
->tp
- trp
->bp
;
226 int nlast
= trp
->lp
- trp
->bp
;
228 trp
->max
= 3 * trp
->max
/ 2 + 1;
229 trp
->bp
= (Token
*) realloc(trp
->bp
, trp
->max
* sizeof(Token
));
230 trp
->lp
= &trp
->bp
[nlast
];
231 trp
->tp
= &trp
->bp
[ncur
];
236 * Compare a row of tokens, ignoring the content of WS; return !=0 if different
239 comparetokens(Tokenrow
* tr1
, Tokenrow
* tr2
)
245 if (tr1
->lp
- tp1
!= tr2
->lp
- tp2
)
247 for (; tp1
< tr1
->lp
; tp1
++, tp2
++)
249 if (tp1
->type
!= tp2
->type
250 || (tp1
->wslen
== 0) != (tp2
->wslen
== 0)
251 || tp1
->len
!= tp2
->len
252 || strncmp((char *) tp1
->t
, (char *) tp2
->t
, tp1
->len
) != 0)
259 * replace ntok tokens starting at dtr->tp with the contents of str.
260 * tp ends up pointing just beyond the replacement.
261 * Canonical whitespace is assured on each side.
264 insertrow(Tokenrow
* dtr
, int ntok
, Tokenrow
* str
)
266 int nrtok
= rowlen(str
);
269 adjustrow(dtr
, nrtok
- ntok
);
271 movetokenrow(dtr
, str
);
276 * make sure there is WS before trp->tp, if tokens might merge in the output
279 makespace(Tokenrow
* trp
, Token
* ntp
)
289 tt
= newstring(tp
->t
, tp
->len
, ntp
->wslen
);
290 strncpy((char *)tt
, (char *)ntp
->t
- ntp
->wslen
, ntp
->wslen
);
291 tp
->t
= tt
+ ntp
->wslen
;
292 tp
->wslen
= ntp
->wslen
;
298 * Copy an entire tokenrow into another, at tp.
299 * It is assumed that there is enough space.
300 * Not strictly conforming.
303 movetokenrow(Tokenrow
* dtr
, Tokenrow
* str
)
307 /* nby = sizeof(Token) * (str->lp - str->bp); */
308 nby
= (char *) str
->lp
- (char *) str
->bp
;
309 memmove(dtr
->tp
, str
->bp
, nby
);
313 * Move the tokens in a row, starting at tr->tp, rightward by nt tokens;
314 * nt may be negative (left move).
315 * The row may need to be grown.
316 * Non-strictly conforming because of the (char *), but easily fixed
319 adjustrow(Tokenrow
* trp
, int nt
)
325 size
= (trp
->lp
- trp
->bp
) + nt
;
326 while (size
> trp
->max
)
328 /* nby = sizeof(Token) * (trp->lp - trp->tp); */
329 nby
= (char *) trp
->lp
- (char *) trp
->tp
;
331 memmove(trp
->tp
+ nt
, trp
->tp
, nby
);
336 * Copy a row of tokens into the destination holder, allocating
337 * the space for the contents. Return the destination.
340 copytokenrow(Tokenrow
* dtr
, Tokenrow
* str
)
342 int len
= rowlen(str
);
344 maketokenrow(len
, dtr
);
345 movetokenrow(dtr
, str
);
351 * Produce a copy of a row of tokens. Start at trp->tp.
352 * The value strings are copied as well. The first token
356 normtokenrow(Tokenrow
* trp
)
359 Tokenrow
*ntrp
= new(Tokenrow
);
362 len
= trp
->lp
- trp
->tp
;
365 maketokenrow(len
, ntrp
);
366 for (tp
= trp
->tp
; tp
< trp
->lp
; tp
++)
371 ntrp
->lp
->t
= newstring(tp
->t
, tp
->len
, 1);
372 *ntrp
->lp
->t
++ = ' ';
378 if (ntrp
->lp
> ntrp
->bp
)
387 peektokens(Tokenrow
* trp
, char *str
)
394 fprintf(stderr
, "%s ", str
);
395 if (tp
< trp
->bp
|| tp
> trp
->lp
)
396 fprintf(stderr
, "(tp offset %ld) ", (long)(tp
- trp
->bp
));
397 for (tp
= trp
->bp
; tp
< trp
->lp
&& tp
< trp
->bp
+ 32; tp
++)
401 int c
= tp
->t
[tp
->len
];
404 fprintf(stderr
, "%s", tp
->t
);
405 tp
->t
[tp
->len
] = (uchar
) c
;
407 fprintf(stderr
, tp
== trp
->tp
? "{%x*} " : "{%x} ", tp
->type
);
409 fprintf(stderr
, "\n");
414 puttokens(Tokenrow
* trp
)
423 for (; tp
< trp
->lp
; tp
++)
427 len
= tp
->len
+ tp
->wslen
;
428 p
= tp
->t
- tp
->wslen
;
430 /* EBCDIC to ANSI conversion requested? */
433 /* keyword __ToLatin1__ found? -> do conversion! */
434 if( EBCDIC_StartTokenDetected
)
436 /* previous token was 'extern'? -> don't convert current token! */
437 if( EBCDIC_ExternTokenDetected
)
439 EBCDIC_ExternTokenDetected
= 0;
444 /* current token is keyword 'extern'? -> don't convert following token! */
445 if( (tp
->wslen
== 0) && (strncmp( (char*)p
, "extern", len
) == 0) )
447 EBCDIC_ExternTokenDetected
= 1;
452 /* token is string or char? -> process EBCDIC to ANSI conversion */
453 if ((tp
->type
== STRING
) || (tp
->type
== CCON
))
454 len
= memcpy_EBCDIC(wbp
, p
, len
);
461 /* keyword __ToLatin1__ found? -> don't copy keyword and start conversion */
462 if( (tp
->type
== NAME
) && (strncmp( (char*)p
, "__ToLatin1__", len
) == 0) )
464 EBCDIC_StartTokenDetected
= 1;
478 if (wbp
>= &wbuf
[OBS
])
481 if (wbp
> &wbuf
[OBS
])
482 memcpy(wbuf
, wbuf
+ OBS
, wbp
- &wbuf
[OBS
]);
487 if (cursource
->fd
== 0)
496 write(1, wbuf
, wbp
- wbuf
);
502 * turn a row into just a newline
505 setempty(Tokenrow
* trp
)
508 trp
->lp
= trp
->bp
+ 1;
516 outnum(char *p
, int n
)
519 p
= outnum(p
, n
/ 10);
520 *p
++ = (char) (n
% 10 + '0');
525 * allocate and initialize a new string from s, of length l, at offset o
529 newstring(uchar
* s
, int l
, int o
)
531 uchar
*ns
= (uchar
*) domalloc(l
+ o
+ 1);
534 return (uchar
*) strncpy((char *) ns
+ o
, (char *) s
, l
) - o
;