1 /* ccl - routines for character classes */
3 /* Copyright (c) 1990 The Regents of the University of California. */
4 /* All rights reserved. */
6 /* This code is derived from software contributed to Berkeley by */
9 /* The United States Government has rights in this work pursuant */
10 /* to contract no. DE-AC03-76SF00098 between the United States */
11 /* Department of Energy and the University of California. */
13 /* This file is part of flex. */
15 /* Redistribution and use in source and binary forms, with or without */
16 /* modification, are permitted provided that the following conditions */
19 /* 1. Redistributions of source code must retain the above copyright */
20 /* notice, this list of conditions and the following disclaimer. */
21 /* 2. Redistributions in binary form must reproduce the above copyright */
22 /* notice, this list of conditions and the following disclaimer in the */
23 /* documentation and/or other materials provided with the distribution. */
25 /* Neither the name of the University nor the names of its contributors */
26 /* may be used to endorse or promote products derived from this software */
27 /* without specific prior written permission. */
29 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
34 __RCSID("$NetBSD: ccl.c,v 1.3 2014/10/30 18:44:05 christos Exp $");
37 /* return true if the chr is in the ccl. Takes negation into account. */
39 ccl_contains (const int cclp
, const int ch
)
46 for (i
= 0; i
< len
; ++i
)
47 if (ccltbl
[ind
+ i
] == ch
)
54 /* ccladd - add a single character to a ccl */
56 void ccladd (cclp
, ch
)
60 int ind
, len
, newpos
, i
;
67 /* check to see if the character is already in the ccl */
69 for (i
= 0; i
< len
; ++i
)
70 if (ccltbl
[ind
+ i
] == ch
)
75 ccl_has_nl
[cclp
] = true;
79 if (newpos
>= current_max_ccl_tbl_size
) {
80 current_max_ccl_tbl_size
+= MAX_CCL_TBL_SIZE_INCREMENT
;
84 ccltbl
= reallocate_Character_array (ccltbl
,
85 current_max_ccl_tbl_size
);
88 ccllen
[cclp
] = len
+ 1;
92 /* dump_cclp - same thing as list_character_set, but for cclps. */
94 static void dump_cclp (FILE* file
, int cclp
)
100 for (i
= 0; i
< csize
; ++i
) {
101 if (ccl_contains(cclp
, i
)){
102 register int start_char
= i
;
106 fputs (readable_form (i
), file
);
108 while (++i
< csize
&& ccl_contains(cclp
,i
)) ;
110 if (i
- 1 > start_char
)
112 fprintf (file
, "-%s",
113 readable_form (i
- 1));
124 /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
126 ccl_set_diff (int a
, int b
)
130 /* create new class */
133 /* In order to handle negation, we spin through all possible chars,
134 * addding each char in a that is not in b.
135 * (This could be O(n^2), but n is small and bounded.)
137 for ( ch
= 0; ch
< csize
; ++ch
)
138 if (ccl_contains (a
, ch
) && !ccl_contains(b
, ch
))
143 fprintf(stderr
, "ccl_set_diff (");
144 fprintf(stderr
, "\n ");
145 dump_cclp (stderr
, a
);
146 fprintf(stderr
, "\n ");
147 dump_cclp (stderr
, b
);
148 fprintf(stderr
, "\n ");
149 dump_cclp (stderr
, d
);
150 fprintf(stderr
, "\n)\n");
155 /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
157 ccl_set_union (int a
, int b
)
161 /* create new class */
165 for (i
= 0; i
< ccllen
[a
]; ++i
)
166 ccladd (d
, ccltbl
[cclmap
[a
] + i
]);
169 for (i
= 0; i
< ccllen
[b
]; ++i
)
170 ccladd (d
, ccltbl
[cclmap
[b
] + i
]);
174 fprintf(stderr
, "ccl_set_union (%d + %d = %d", a
, b
, d
);
175 fprintf(stderr
, "\n ");
176 dump_cclp (stderr
, a
);
177 fprintf(stderr
, "\n ");
178 dump_cclp (stderr
, b
);
179 fprintf(stderr
, "\n ");
180 dump_cclp (stderr
, d
);
181 fprintf(stderr
, "\n)\n");
187 /* cclinit - return an empty ccl */
191 if (++lastccl
>= current_maxccls
) {
192 current_maxccls
+= MAX_CCLS_INCREMENT
;
197 reallocate_integer_array (cclmap
, current_maxccls
);
199 reallocate_integer_array (ccllen
, current_maxccls
);
200 cclng
= reallocate_integer_array (cclng
, current_maxccls
);
202 reallocate_bool_array (ccl_has_nl
,
207 /* we're making the first ccl */
211 /* The new pointer is just past the end of the last ccl.
212 * Since the cclmap points to the \first/ character of a
213 * ccl, adding the length of the ccl to the cclmap pointer
214 * will produce a cursor to the first free space.
217 cclmap
[lastccl
- 1] + ccllen
[lastccl
- 1];
220 cclng
[lastccl
] = 0; /* ccl's start out life un-negated */
221 ccl_has_nl
[lastccl
] = false;
227 /* cclnegate - negate the given ccl */
229 void cclnegate (cclp
)
233 ccl_has_nl
[cclp
] = !ccl_has_nl
[cclp
];
237 /* list_character_set - list the members of a set of characters in CCL form
239 * Writes to the given file a character-class representation of those
240 * characters present in the given CCL. A character is present if it
241 * has a non-zero value in the cset array.
244 void list_character_set (file
, cset
)
252 for (i
= 0; i
< csize
; ++i
) {
254 register int start_char
= i
;
258 fputs (readable_form (i
), file
);
260 while (++i
< csize
&& cset
[i
]) ;
262 if (i
- 1 > start_char
)
264 fprintf (file
, "-%s",
265 readable_form (i
- 1));
274 /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
275 * scanner. Specifically, if a lowercase or uppercase character, x, is in the
276 * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
277 * be in the range. If not, then this range is ambiguous, and the function
278 * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that
279 * [a-z] will be labeled ambiguous because it does not include [A-Z].
281 * @param c1 the lower end of the range
282 * @param c2 the upper end of the range
283 * @return true if [c1-c2] is not ambiguous for a caseless scanner.
285 bool range_covers_case (int c1
, int c2
)
289 for (i
= c1
; i
<= c2
; i
++) {
291 o
= reverse_case (i
);
292 if (o
< c1
|| c2
< o
)
299 /** Reverse the case of a character, if possible.
300 * @return c if case-reversal does not apply.
302 int reverse_case (int c
)
304 return isupper (c
) ? tolower (c
) : (islower (c
) ? toupper (c
) : c
);
307 /** Return true if c is uppercase or lowercase. */
308 bool has_case (int c
)
310 return (isupper (c
) || islower (c
)) ? true : false;