4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
30 #pragma ident "%Z%%M% %I% %E% SMI"
35 * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
36 * IT IS **NOT** CHARACTER SET INDEPENDENT.
40 #pragma weak _regex = regex
43 /* CONSTANTS SHARED WITH regcmp() */
54 /* PRIVATE CONSTANTS */
56 #define ADD_256_TO_GROUP_LENGTH 0x1
57 #define ADD_512_TO_GROUP_LENGTH 0x2
58 #define ADD_768_TO_GROUP_LENGTH 0x3
59 #define ADDED_LENGTH_BITS 0x3
60 #define SINGLE_BYTE_MASK 0xff
61 #define STRINGP_STACK_SIZE 50
64 /* PRIVATE TYPE DEFINITIONS */
69 } char_test_condition_t
;
79 /* PRIVATE GLOBAL VARIABLES */
81 static mutex_t regex_lock
= DEFAULTMUTEX
;
82 static int return_arg_number
[NSUBSTRINGS
];
83 static const char *substring_endp
[NSUBSTRINGS
];
84 static const char *substring_startp
[NSUBSTRINGS
];
85 static const char *stringp_stack
[STRINGP_STACK_SIZE
];
86 static const char **stringp_stackp
;
89 /* DECLARATIONS OF PRIVATE FUNCTIONS */
92 get_wchar(wchar_t *wcharp
,
96 get_match_counts(int *nmust_matchp
,
97 int *nextra_matches_allowedp
,
98 const char *count_stringp
);
101 in_wchar_range(wchar_t test_char
,
109 previous_charp(const char *current_charp
);
112 push_stringp(const char *stringp
);
114 static char_test_result_t
115 test_char_against_ascii_class(char test_char
,
117 char_test_condition_t test_condition
);
119 static char_test_result_t
120 test_char_against_multibyte_class(wchar_t test_char
,
122 char_test_condition_t test_condition
);
125 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
127 static char_test_result_t
128 test_char_against_old_ascii_class(char test_char
,
130 char_test_condition_t test_condition
);
133 test_repeated_ascii_char(const char *repeat_startp
,
138 test_repeated_multibyte_char(const char *repeat_startp
,
143 test_repeated_group(const char *repeat_startp
,
148 test_string(const char *stringp
,
152 /* DEFINITIONS OF PUBLIC VARIABLES */
157 * reserve thread-specific storage for __loc1
164 return ((char **)tsdalloc(_T_REGEX_LOC1
, sizeof (char *), NULL
));
167 #define __loc1 (*(____loc1()))
169 /* DEFINITION OF regex() */
172 regex(const char *regexp
, const char *stringp
, ...)
176 const char *end_of_matchp
;
178 char *return_argp
[NSUBSTRINGS
];
179 char *returned_substringp
;
181 const char *substringp
;
182 wchar_t string_wchar
;
184 if (____loc1() == (char **)0) {
187 lmutex_lock(®ex_lock
);
191 if ((stringp
== NULL
) || (regexp
== NULL
)) {
192 lmutex_unlock(®ex_lock
);
197 /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */
200 va_start(arg_listp
, stringp
);
201 while (substringn
< NSUBSTRINGS
) {
202 return_argp
[substringn
] = va_arg(arg_listp
, char *);
203 substring_startp
[substringn
] = NULL
;
204 return_arg_number
[substringn
] = -1;
210 /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */
212 end_of_matchp
= NULL
;
213 stringp_stackp
= &stringp_stack
[STRINGP_STACK_SIZE
];
215 if ((int)*regexp
== (int)START_OF_STRING_MARK
) {
218 * the match must start at the beginning of the string
221 __loc1
= (char *)stringp
;
223 end_of_matchp
= test_string(stringp
, regexp
);
225 } else if ((int)*regexp
== (int)ASCII_CHAR
) {
228 * test a string against a regular expression
229 * that starts with a single ASCII character:
231 * move to each character in the string that matches
232 * the first character in the regular expression
233 * and test the remaining string
236 while ((*stringp
!= *(regexp
+ 1)) && (*stringp
!= '\0')) {
239 while ((end_of_matchp
== NULL
) && (*stringp
!= '\0')) {
240 end_of_matchp
= test_string(stringp
, regexp
);
241 if (end_of_matchp
!= NULL
) {
242 __loc1
= (char *)stringp
;
245 while ((*stringp
!= *(regexp
+ 1)) && (*stringp
!= '\0')) {
251 } else if (!multibyte
) {
254 * if the value of the "multibyte" macro defined in <euc.h>
255 * is false, regex() is running in an ASCII locale;
256 * test an ASCII string against an ASCII regular expression
257 * that doesn't start with a single ASCII character:
259 * move forward in the string one byte at a time, testing
260 * the remaining string against the regular expression
263 end_of_matchp
= test_string(stringp
, regexp
);
264 while ((end_of_matchp
== NULL
) && (*stringp
!= '\0')) {
266 end_of_matchp
= test_string(stringp
, regexp
);
268 if (end_of_matchp
!= NULL
) {
269 __loc1
= (char *)stringp
;
272 } else if ((int)*regexp
== (int)MULTIBYTE_CHAR
) {
275 * test a multibyte string against a multibyte regular expression
276 * that starts with a single multibyte character:
278 * move to each character in the string that matches
279 * the first character in the regular expression
280 * and test the remaining string
283 (void) get_wchar(®ex_wchar
, regexp
+ 1);
284 char_size
= get_wchar(&string_wchar
, stringp
);
285 while ((string_wchar
!= regex_wchar
) && (char_size
> 0)) {
286 stringp
+= char_size
;
287 char_size
= get_wchar(&string_wchar
, stringp
);
289 while ((end_of_matchp
== NULL
) && (char_size
> 0)) {
290 end_of_matchp
= test_string(stringp
, regexp
);
291 if (end_of_matchp
!= NULL
) {
292 __loc1
= (char *)stringp
;
294 stringp
+= char_size
;
295 char_size
= get_wchar(&string_wchar
, stringp
);
296 while ((string_wchar
!= regex_wchar
) && (char_size
> 0)) {
297 stringp
+= char_size
;
298 char_size
= get_wchar(&string_wchar
, stringp
);
306 * test a multibyte string against a multibyte regular expression
307 * that doesn't start with a single multibyte character
309 * move forward in the string one multibyte character at a time,
310 * testing the remaining string against the regular expression
313 end_of_matchp
= test_string(stringp
, regexp
);
314 char_size
= get_wchar(&string_wchar
, stringp
);
315 while ((end_of_matchp
== NULL
) && (char_size
> 0)) {
316 stringp
+= char_size
;
317 end_of_matchp
= test_string(stringp
, regexp
);
318 char_size
= get_wchar(&string_wchar
, stringp
);
320 if (end_of_matchp
!= NULL
) {
321 __loc1
= (char *)stringp
;
326 * Return substrings that matched subexpressions for which
327 * matching substrings are to be returned.
331 * According to manual page regcmp(3G), regex() returns substrings
332 * that match subexpressions even when no substring matches the
333 * entire regular expression.
337 while (substringn
< NSUBSTRINGS
) {
338 substringp
= substring_startp
[substringn
];
339 if ((substringp
!= NULL
) &&
340 (return_arg_number
[substringn
] >= 0)) {
341 returned_substringp
=
342 return_argp
[return_arg_number
[substringn
]];
343 if (returned_substringp
!= NULL
) {
344 while (substringp
< substring_endp
[substringn
]) {
345 *returned_substringp
= (char)*substringp
;
346 returned_substringp
++;
349 *returned_substringp
= '\0';
354 lmutex_unlock(®ex_lock
);
355 return ((char *)end_of_matchp
);
359 /* DEFINITIONS OF PRIVATE FUNCTIONS */
362 get_wchar(wchar_t *wcharp
,
367 if (stringp
== NULL
) {
369 *wcharp
= (wchar_t)((unsigned int)'\0');
370 } else if (*stringp
== '\0') {
372 *wcharp
= (wchar_t)((unsigned int)*stringp
);
373 } else if ((unsigned char)*stringp
<= (unsigned char)0x7f) {
375 *wcharp
= (wchar_t)((unsigned int)*stringp
);
377 char_size
= mbtowc(wcharp
, stringp
, MB_LEN_MAX
);
383 get_match_counts(int *nmust_matchp
,
384 int *nextra_matches_allowedp
,
385 const char *count_stringp
)
387 int minimum_match_count
;
388 int maximum_match_count
;
390 minimum_match_count
=
391 (int)((unsigned int)*count_stringp
& SINGLE_BYTE_MASK
);
392 *nmust_matchp
= minimum_match_count
;
395 maximum_match_count
=
396 (int)((unsigned int)*count_stringp
& SINGLE_BYTE_MASK
);
397 if (maximum_match_count
== (int)UNLIMITED
) {
398 *nextra_matches_allowedp
= (int)UNLIMITED
;
400 *nextra_matches_allowedp
=
401 maximum_match_count
- minimum_match_count
;
405 } /* get_match_counts() */
408 in_wchar_range(wchar_t test_char
,
412 return (((lower_char
<= 0x7f) && (upper_char
<= 0x7f) &&
413 (lower_char
<= test_char
) && (test_char
<= upper_char
)) ||
414 (((test_char
& WCHAR_CSMASK
) == (lower_char
& WCHAR_CSMASK
)) &&
415 ((test_char
& WCHAR_CSMASK
) == (upper_char
& WCHAR_CSMASK
)) &&
416 (lower_char
<= test_char
) && (test_char
<= upper_char
)));
418 } /* in_wchar_range() */
425 if (stringp_stackp
>= &stringp_stack
[STRINGP_STACK_SIZE
]) {
428 stringp
= *stringp_stackp
;
436 previous_charp(const char *current_charp
)
439 * returns the pointer to the previous character in
440 * a string of multibyte characters
443 const char *prev_cs0
= current_charp
- 1;
444 const char *prev_cs1
= current_charp
- eucw1
;
445 const char *prev_cs2
= current_charp
- eucw2
- 1;
446 const char *prev_cs3
= current_charp
- eucw3
- 1;
447 const char *prev_charp
;
449 if ((unsigned char)*prev_cs0
<= 0x7f) {
450 prev_charp
= prev_cs0
;
451 } else if ((unsigned char)*prev_cs2
== SS2
) {
452 prev_charp
= prev_cs2
;
453 } else if ((unsigned char)*prev_cs3
== SS3
) {
454 prev_charp
= prev_cs3
;
456 prev_charp
= prev_cs1
;
460 } /* previous_charp() */
463 push_stringp(const char *stringp
)
465 if (stringp_stackp
<= &stringp_stack
[0]) {
469 *stringp_stackp
= stringp
;
475 static char_test_result_t
476 test_char_against_ascii_class(char test_char
,
478 char_test_condition_t test_condition
)
481 * tests a character for membership in an ASCII character class compiled
482 * by the internationalized version of regcmp();
484 * NOTE: The internationalized version of regcmp() compiles
485 * the range a-z in an ASCII character class to aTHRUz.
490 nbytes_to_check
= (int)*classp
;
494 while (nbytes_to_check
> 0) {
495 if (test_char
== *classp
) {
496 if (test_condition
== IN_CLASS
)
497 return (CONDITION_TRUE
);
499 return (CONDITION_FALSE
);
500 } else if (*classp
== THRU
) {
501 if ((*(classp
- 1) <= test_char
) &&
502 (test_char
<= *(classp
+ 1))) {
503 if (test_condition
== IN_CLASS
)
504 return (CONDITION_TRUE
);
506 return (CONDITION_FALSE
);
509 nbytes_to_check
-= 2;
516 if (test_condition
== NOT_IN_CLASS
) {
517 return (CONDITION_TRUE
);
519 return (CONDITION_FALSE
);
521 } /* test_char_against_ascii_class() */
523 static char_test_result_t
524 test_char_against_multibyte_class(wchar_t test_char
,
526 char_test_condition_t test_condition
)
529 * tests a character for membership in a multibyte character class;
531 * NOTE: The range a-z in a multibyte character class compiles to
536 wchar_t current_char
;
538 wchar_t previous_char
;
540 nbytes_to_check
= (int)*classp
;
544 char_size
= get_wchar(¤t_char
, classp
);
545 if (char_size
<= 0) {
546 return (CHAR_TEST_ERROR
);
547 } else if (test_char
== current_char
) {
548 if (test_condition
== IN_CLASS
) {
549 return (CONDITION_TRUE
);
551 return (CONDITION_FALSE
);
555 nbytes_to_check
-= char_size
;
558 while (nbytes_to_check
> 0) {
559 previous_char
= current_char
;
560 char_size
= get_wchar(¤t_char
, classp
);
561 if (char_size
<= 0) {
562 return (CHAR_TEST_ERROR
);
563 } else if (test_char
== current_char
) {
564 if (test_condition
== IN_CLASS
) {
565 return (CONDITION_TRUE
);
567 return (CONDITION_FALSE
);
569 } else if (current_char
== THRU
) {
571 nbytes_to_check
-= char_size
;
572 char_size
= get_wchar(¤t_char
, classp
);
573 if (char_size
<= 0) {
574 return (CHAR_TEST_ERROR
);
575 } else if (in_wchar_range(test_char
, previous_char
,
577 if (test_condition
== IN_CLASS
) {
578 return (CONDITION_TRUE
);
580 return (CONDITION_FALSE
);
584 nbytes_to_check
-= char_size
;
588 nbytes_to_check
-= char_size
;
591 if (test_condition
== NOT_IN_CLASS
) {
592 return (CONDITION_TRUE
);
594 return (CONDITION_FALSE
);
596 } /* test_char_against_multibyte_class() */
599 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
601 static char_test_result_t
602 test_char_against_old_ascii_class(char test_char
,
604 char_test_condition_t test_condition
)
607 * tests a character for membership in an ASCII character class compiled
608 * by the ASCII version of regcmp();
610 * NOTE: ASCII versions of regcmp() compile the range a-z in an
611 * ASCII character class to THRUaz. The internationalized
612 * version compiles the same range to aTHRUz.
617 nbytes_to_check
= (int)*classp
;
621 while (nbytes_to_check
> 0) {
622 if (test_char
== *classp
) {
623 if (test_condition
== IN_CLASS
) {
624 return (CONDITION_TRUE
);
626 return (CONDITION_FALSE
);
628 } else if (*classp
== THRU
) {
629 if ((*(classp
+ 1) <= test_char
) &&
630 (test_char
<= *(classp
+ 2))) {
631 if (test_condition
== IN_CLASS
) {
632 return (CONDITION_TRUE
);
634 return (CONDITION_FALSE
);
638 nbytes_to_check
-= 3;
645 if (test_condition
== NOT_IN_CLASS
) {
646 return (CONDITION_TRUE
);
648 return (CONDITION_FALSE
);
650 } /* test_char_against_old_ascii_class() */
653 test_repeated_ascii_char(const char *repeat_startp
,
657 const char *end_of_matchp
;
659 end_of_matchp
= test_string(stringp
, regexp
);
660 while ((end_of_matchp
== NULL
) &&
661 (stringp
> repeat_startp
)) {
663 end_of_matchp
= test_string(stringp
, regexp
);
665 return (end_of_matchp
);
669 test_repeated_multibyte_char(const char *repeat_startp
,
673 const char *end_of_matchp
;
675 end_of_matchp
= test_string(stringp
, regexp
);
676 while ((end_of_matchp
== NULL
) &&
677 (stringp
> repeat_startp
)) {
678 stringp
= previous_charp(stringp
);
679 end_of_matchp
= test_string(stringp
, regexp
);
681 return (end_of_matchp
);
685 test_repeated_group(const char *repeat_startp
,
689 const char *end_of_matchp
;
691 end_of_matchp
= test_string(stringp
, regexp
);
692 while ((end_of_matchp
== NULL
) &&
693 (stringp
> repeat_startp
)) {
694 stringp
= pop_stringp();
695 if (stringp
== NULL
) {
698 end_of_matchp
= test_string(stringp
, regexp
);
700 return (end_of_matchp
);
704 test_string(const char *stringp
,
708 * returns a pointer to the first character following the first
709 * substring of the string addressed by stringp that matches
710 * the compiled regular expression addressed by regexp
713 unsigned int group_length
;
714 int nextra_matches_allowed
;
718 const char *repeat_startp
;
719 unsigned int return_argn
;
720 wchar_t string_wchar
;
721 int string_char_size
;
722 unsigned int substringn
;
723 char_test_condition_t test_condition
;
724 const char *test_stringp
;
729 * Exit the loop via a return whenever there's a match
730 * or it's clear that there can be no match.
733 switch ((int)*regexp
) {
737 * Each case ends with either a return or with stringp
738 * addressing the next character to be tested and regexp
739 * addressing the next compiled regular expression
741 * NOTE: The comments for each case give the meaning
742 * of the compiled regular expression decoded by the case
743 * and the character string that the compiled regular
744 * expression uses to encode the case. Each single
745 * character encoded in the compiled regular expression
746 * is shown enclosed in angle brackets (<>). Each
747 * compiled regular expression begins with a marker
748 * character which is shown as a named constant
749 * (e.g. <ASCII_CHAR>). Character constants are shown
750 * enclosed in single quotes (e.g. <'$'>). All other
751 * single characters encoded in the compiled regular
752 * expression are shown as lower case variable names
753 * (e.g. <ascii_char> or <multibyte_char>). Multicharacter
754 * strings encoded in the compiled regular expression
755 * are shown as variable names followed by elipses
756 * (e.g. <compiled_regex...>).
759 case ASCII_CHAR
: /* single ASCII char */
761 /* encoded as <ASCII_CHAR><ascii_char> */
764 if (*regexp
== *stringp
) {
770 break; /* end case ASCII_CHAR */
772 case MULTIBYTE_CHAR
: /* single multibyte char */
774 /* encoded as <MULTIBYTE_CHAR><multibyte_char> */
777 regex_char_size
= get_wchar(®ex_wchar
, regexp
);
778 string_char_size
= get_wchar(&string_wchar
, stringp
);
779 if ((string_char_size
<= 0) || (string_wchar
!= regex_wchar
)) {
782 regexp
+= regex_char_size
;
783 stringp
+= string_char_size
;
785 break; /* end case MULTIBYTE_CHAR */
787 case ANY_CHAR
: /* any single ASCII or multibyte char */
789 /* encoded as <ANY_CHAR> */
792 if (*stringp
== '\0') {
799 string_char_size
= get_wchar(&string_wchar
, stringp
);
800 if (string_char_size
<= 0) {
804 stringp
+= string_char_size
;
807 break; /* end case ANY_CHAR */
809 case IN_ASCII_CHAR_CLASS
: /* [.....] */
810 case NOT_IN_ASCII_CHAR_CLASS
:
813 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...>
814 * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...>
816 * NOTE: <class_length> includes the <class_length> byte
819 if ((int)*regexp
== (int)IN_ASCII_CHAR_CLASS
) {
820 test_condition
= IN_CLASS
;
822 test_condition
= NOT_IN_CLASS
;
824 regexp
++; /* point to the <class_length> byte */
826 if ((*stringp
!= '\0') &&
827 (test_char_against_ascii_class(*stringp
, regexp
,
828 test_condition
) == CONDITION_TRUE
)) {
829 regexp
+= (int)*regexp
; /* add the class length to regexp */
834 break; /* end case IN_ASCII_CHAR_CLASS */
836 case IN_MULTIBYTE_CHAR_CLASS
: /* [....] */
837 case NOT_IN_MULTIBYTE_CHAR_CLASS
:
840 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
841 * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
843 * NOTE: <class_length> includes the <class_length> byte
846 if ((int)*regexp
== (int)IN_MULTIBYTE_CHAR_CLASS
) {
847 test_condition
= IN_CLASS
;
849 test_condition
= NOT_IN_CLASS
;
851 regexp
++; /* point to the <class_length> byte */
853 string_char_size
= get_wchar(&string_wchar
, stringp
);
854 if ((string_char_size
> 0) &&
855 (test_char_against_multibyte_class(string_wchar
, regexp
,
856 test_condition
) == CONDITION_TRUE
)) {
857 regexp
+= (int)*regexp
; /* add the class length to regexp */
858 stringp
+= string_char_size
;
862 break; /* end case IN_MULTIBYTE_CHAR_CLASS */
864 case IN_OLD_ASCII_CHAR_CLASS
: /* [...] */
865 case NOT_IN_OLD_ASCII_CHAR_CLASS
:
868 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
869 * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
871 * NOTE: <class_length> includes the <class_length> byte
874 if ((int)*regexp
== (int)IN_OLD_ASCII_CHAR_CLASS
) {
875 test_condition
= IN_CLASS
;
877 test_condition
= NOT_IN_CLASS
;
879 regexp
++; /* point to the <class_length> byte */
881 if ((*stringp
!= '\0') &&
882 (test_char_against_old_ascii_class(*stringp
, regexp
,
883 test_condition
) == CONDITION_TRUE
)) {
884 regexp
+= (int)*regexp
; /* add the class length to regexp */
889 break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */
891 case SIMPLE_GROUP
: /* (.....) */
893 /* encoded as <SIMPLE_GROUP><group_length> */
896 break; /* end case SIMPLE_GROUP */
898 case END_GROUP
: /* (.....) */
900 /* encoded as <END_GROUP><groupn> */
903 break; /* end case END_GROUP */
905 case SAVED_GROUP
: /* (.....)$0-9 */
907 /* encoded as <SAVED_GROUP><substringn> */
910 substringn
= (unsigned int)*regexp
;
911 if (substringn
>= NSUBSTRINGS
)
913 substring_startp
[substringn
] = stringp
;
915 break; /* end case SAVED_GROUP */
917 case END_SAVED_GROUP
: /* (.....)$0-9 */
920 * encoded as <END_SAVED_GROUP><substringn>\
921 * <return_arg_number[substringn]>
925 substringn
= (unsigned int)*regexp
;
926 if (substringn
>= NSUBSTRINGS
)
928 substring_endp
[substringn
] = stringp
;
930 return_argn
= (unsigned int)*regexp
;
931 if (return_argn
>= NSUBSTRINGS
)
933 return_arg_number
[substringn
] = return_argn
;
935 break; /* end case END_SAVED_GROUP */
937 case ASCII_CHAR
|ZERO_OR_MORE
: /* char* */
939 /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */
942 repeat_startp
= stringp
;
943 while (*stringp
== *regexp
) {
947 return (test_repeated_ascii_char(repeat_startp
,
950 /* end case ASCII_CHAR|ZERO_OR_MORE */
952 case ASCII_CHAR
|ONE_OR_MORE
: /* char+ */
954 /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */
957 if (*stringp
!= *regexp
) {
961 repeat_startp
= stringp
;
962 while (*stringp
== *regexp
) {
966 return (test_repeated_ascii_char(repeat_startp
, stringp
,
969 /* end case ASCII_CHAR|ONE_OR_MORE */
971 case ASCII_CHAR
|COUNT
: /* char{min_count,max_count} */
974 * encoded as <ASCII_CHAR|COUNT><ascii_char>\
975 * <minimum_match_count><maximum_match_count>
979 get_match_counts(&nmust_match
, &nextra_matches_allowed
,
981 while ((*stringp
== *regexp
) && (nmust_match
> 0)) {
985 if (nmust_match
> 0) {
987 } else if (nextra_matches_allowed
== UNLIMITED
) {
988 repeat_startp
= stringp
;
989 while (*stringp
== *regexp
) {
993 return (test_repeated_ascii_char(repeat_startp
, stringp
,
996 repeat_startp
= stringp
;
997 while ((*stringp
== *regexp
) &&
998 (nextra_matches_allowed
> 0)) {
999 nextra_matches_allowed
--;
1003 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1006 /* end case ASCII_CHAR|COUNT */
1008 case MULTIBYTE_CHAR
|ZERO_OR_MORE
: /* char* */
1010 /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */
1013 regex_char_size
= get_wchar(®ex_wchar
, regexp
);
1014 repeat_startp
= stringp
;
1015 string_char_size
= get_wchar(&string_wchar
, stringp
);
1016 while ((string_char_size
> 0) &&
1017 (string_wchar
== regex_wchar
)) {
1018 stringp
+= string_char_size
;
1019 string_char_size
= get_wchar(&string_wchar
, stringp
);
1021 regexp
+= regex_char_size
;
1022 return (test_repeated_multibyte_char(repeat_startp
, stringp
,
1025 /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */
1027 case MULTIBYTE_CHAR
|ONE_OR_MORE
: /* char+ */
1029 /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */
1032 regex_char_size
= get_wchar(®ex_wchar
, regexp
);
1033 string_char_size
= get_wchar(&string_wchar
, stringp
);
1034 if ((string_char_size
<= 0) || (string_wchar
!= regex_wchar
)) {
1037 stringp
+= string_char_size
;
1038 repeat_startp
= stringp
;
1039 string_char_size
= get_wchar(&string_wchar
, stringp
);
1040 while ((string_char_size
> 0) &&
1041 (string_wchar
== regex_wchar
)) {
1042 stringp
+= string_char_size
;
1043 string_char_size
= get_wchar(&string_wchar
, stringp
);
1045 regexp
+= regex_char_size
;
1046 return (test_repeated_multibyte_char(repeat_startp
, stringp
,
1049 /* end case MULTIBYTE_CHAR|ONE_OR_MORE */
1051 case MULTIBYTE_CHAR
|COUNT
: /* char{min_count,max_count} */
1054 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\
1055 * <minimum_match_count><maximum_match_count>
1059 regex_char_size
= get_wchar(®ex_wchar
, regexp
);
1060 get_match_counts(&nmust_match
, &nextra_matches_allowed
,
1061 regexp
+ regex_char_size
);
1062 string_char_size
= get_wchar(&string_wchar
, stringp
);
1063 while ((string_char_size
> 0) &&
1064 (string_wchar
== regex_wchar
) &&
1065 (nmust_match
> 0)) {
1068 stringp
+= string_char_size
;
1069 string_char_size
= get_wchar(&string_wchar
, stringp
);
1071 if (nmust_match
> 0) {
1073 } else if (nextra_matches_allowed
== UNLIMITED
) {
1074 repeat_startp
= stringp
;
1075 while ((string_char_size
> 0) &&
1076 (string_wchar
== regex_wchar
)) {
1077 stringp
+= string_char_size
;
1078 string_char_size
= get_wchar(&string_wchar
, stringp
);
1080 regexp
+= regex_char_size
+ 2;
1081 return (test_repeated_multibyte_char(repeat_startp
, stringp
,
1084 repeat_startp
= stringp
;
1085 while ((string_char_size
> 0) &&
1086 (string_wchar
== regex_wchar
) &&
1087 (nextra_matches_allowed
> 0)) {
1088 nextra_matches_allowed
--;
1089 stringp
+= string_char_size
;
1090 string_char_size
= get_wchar(&string_wchar
, stringp
);
1092 regexp
+= regex_char_size
+ 2;
1093 return (test_repeated_multibyte_char(repeat_startp
, stringp
,
1096 /* end case MULTIBYTE_CHAR|COUNT */
1098 case ANY_CHAR
|ZERO_OR_MORE
: /* .* */
1100 /* encoded as <ANY_CHAR|ZERO_OR_MORE> */
1102 repeat_startp
= stringp
;
1104 while (*stringp
!= '\0') {
1108 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1111 string_char_size
= get_wchar(&string_wchar
, stringp
);
1112 while (string_char_size
> 0) {
1113 stringp
+= string_char_size
;
1114 string_char_size
= get_wchar(&string_wchar
, stringp
);
1117 return (test_repeated_multibyte_char(repeat_startp
, stringp
,
1120 /* end case <ANY_CHAR|ZERO_OR_MORE> */
1122 case ANY_CHAR
|ONE_OR_MORE
: /* .+ */
1124 /* encoded as <ANY_CHAR|ONE_OR_MORE> */
1127 if (*stringp
== '\0') {
1131 repeat_startp
= stringp
;
1132 while (*stringp
!= '\0') {
1136 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1140 string_char_size
= get_wchar(&string_wchar
, stringp
);
1141 if (string_char_size
<= 0) {
1144 stringp
+= string_char_size
;
1145 repeat_startp
= stringp
;
1146 string_char_size
= get_wchar(&string_wchar
, stringp
);
1147 while (string_char_size
> 0) {
1148 stringp
+= string_char_size
;
1150 get_wchar(&string_wchar
, stringp
);
1153 return (test_repeated_multibyte_char(repeat_startp
,
1157 /* end case <ANY_CHAR|ONE_OR_MORE> */
1159 case ANY_CHAR
|COUNT
: /* .{min_count,max_count} */
1162 * encoded as <ANY_CHAR|COUNT>\
1163 * <minimum_match_count><maximum_match_count>
1166 get_match_counts(&nmust_match
, &nextra_matches_allowed
,
1169 while ((*stringp
!= '\0') && (nmust_match
> 0)) {
1173 if (nmust_match
> 0) {
1175 } else if (nextra_matches_allowed
== UNLIMITED
) {
1176 repeat_startp
= stringp
;
1177 while (*stringp
!= '\0') {
1181 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1184 repeat_startp
= stringp
;
1185 while ((*stringp
!= '\0') &&
1186 (nextra_matches_allowed
> 0)) {
1187 nextra_matches_allowed
--;
1191 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1194 } else { /* multibyte character */
1196 string_char_size
= get_wchar(&string_wchar
, stringp
);
1197 while ((string_char_size
> 0) && (nmust_match
> 0)) {
1199 stringp
+= string_char_size
;
1200 string_char_size
= get_wchar(&string_wchar
, stringp
);
1202 if (nmust_match
> 0) {
1204 } else if (nextra_matches_allowed
== UNLIMITED
) {
1205 repeat_startp
= stringp
;
1206 while (string_char_size
> 0) {
1207 stringp
+= string_char_size
;
1209 get_wchar(&string_wchar
, stringp
);
1212 return (test_repeated_multibyte_char(repeat_startp
,
1215 repeat_startp
= stringp
;
1216 while ((string_char_size
> 0) &&
1217 (nextra_matches_allowed
> 0)) {
1218 nextra_matches_allowed
--;
1219 stringp
+= string_char_size
;
1221 get_wchar(&string_wchar
, stringp
);
1224 return (test_repeated_multibyte_char(repeat_startp
,
1227 } /* end case ANY_CHAR|COUNT */
1229 case IN_ASCII_CHAR_CLASS
|ZERO_OR_MORE
: /* [.....]* */
1230 case NOT_IN_ASCII_CHAR_CLASS
|ZERO_OR_MORE
:
1233 * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1234 * <class_length><class ...>
1235 * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1236 * <class_length><class ...>
1238 * NOTE: <class_length> includes the <class_length> byte
1241 if ((int)*regexp
== (int)(IN_ASCII_CHAR_CLASS
|ZERO_OR_MORE
)) {
1242 test_condition
= IN_CLASS
;
1244 test_condition
= NOT_IN_CLASS
;
1246 regexp
++; /* point to the <class_length> byte */
1248 repeat_startp
= stringp
;
1249 while ((*stringp
!= '\0') &&
1250 (test_char_against_ascii_class(*stringp
, regexp
,
1251 test_condition
) == CONDITION_TRUE
)) {
1254 regexp
+= (int)*regexp
; /* add the class length to regexp */
1255 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1258 /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1260 case IN_ASCII_CHAR_CLASS
|ONE_OR_MORE
: /* [.....]+ */
1261 case NOT_IN_ASCII_CHAR_CLASS
|ONE_OR_MORE
:
1264 * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1265 * <class_length><class ...>
1266 * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1267 * <class_length><class ...>
1269 * NOTE: <class_length> includes the <class_length> byte
1272 if ((int)*regexp
== (int)(IN_ASCII_CHAR_CLASS
|ONE_OR_MORE
)) {
1273 test_condition
= IN_CLASS
;
1275 test_condition
= NOT_IN_CLASS
;
1277 regexp
++; /* point to the <class_length> byte */
1279 if ((*stringp
== '\0') ||
1280 (test_char_against_ascii_class(*stringp
, regexp
,
1281 test_condition
) != CONDITION_TRUE
)) {
1285 repeat_startp
= stringp
;
1286 while ((*stringp
!= '\0') &&
1287 (test_char_against_ascii_class(*stringp
, regexp
,
1288 test_condition
) == CONDITION_TRUE
)) {
1291 regexp
+= (int)*regexp
; /* add the class length to regexp */
1292 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1295 /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */
1297 case IN_ASCII_CHAR_CLASS
| COUNT
: /* [.....]{max_count,min_count} */
1298 case NOT_IN_ASCII_CHAR_CLASS
| COUNT
:
1301 * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1302 * <class ...><minimum_match_count>\
1303 * <maximum_match_count>
1304 * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1305 * <class ...><minimum_match_count>\
1306 * <maximum_match_count>
1308 * NOTE: <class_length> includes the <class_length> byte,
1309 * but not the <minimum_match_count> or
1310 * <maximum_match_count> bytes
1313 if ((int)*regexp
== (int)(IN_ASCII_CHAR_CLASS
|COUNT
)) {
1314 test_condition
= IN_CLASS
;
1316 test_condition
= NOT_IN_CLASS
;
1318 regexp
++; /* point to the <class_length> byte */
1320 get_match_counts(&nmust_match
, &nextra_matches_allowed
,
1321 regexp
+ (int)*regexp
);
1322 while ((*stringp
!= '\0') &&
1323 (test_char_against_ascii_class(*stringp
, regexp
,
1324 test_condition
) == CONDITION_TRUE
) &&
1325 (nmust_match
> 0)) {
1329 if (nmust_match
> 0) {
1331 } else if (nextra_matches_allowed
== UNLIMITED
) {
1332 repeat_startp
= stringp
;
1333 while ((*stringp
!= '\0') &&
1334 (test_char_against_ascii_class(*stringp
, regexp
,
1335 test_condition
) == CONDITION_TRUE
)) {
1338 regexp
+= (int)*regexp
+ 2;
1339 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1342 repeat_startp
= stringp
;
1343 while ((*stringp
!= '\0') &&
1344 (test_char_against_ascii_class(*stringp
, regexp
,
1345 test_condition
) == CONDITION_TRUE
) &&
1346 (nextra_matches_allowed
> 0)) {
1347 nextra_matches_allowed
--;
1350 regexp
+= (int)*regexp
+ 2;
1351 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1354 /* end case IN_ASCII_CHAR_CLASS|COUNT */
1356 case IN_MULTIBYTE_CHAR_CLASS
|ZERO_OR_MORE
: /* [.....]* */
1357 case NOT_IN_MULTIBYTE_CHAR_CLASS
|ZERO_OR_MORE
:
1360 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1361 * <class_length><class ...>
1362 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1363 * <class_length><class ...>
1365 * NOTE: <class_length> includes the <class_length> byte
1369 (int)(IN_MULTIBYTE_CHAR_CLASS
|ZERO_OR_MORE
)) {
1370 test_condition
= IN_CLASS
;
1372 test_condition
= NOT_IN_CLASS
;
1374 regexp
++; /* point to the <class_length> byte */
1376 repeat_startp
= stringp
;
1377 string_char_size
= get_wchar(&string_wchar
, stringp
);
1378 while ((string_char_size
> 0) &&
1379 (test_char_against_multibyte_class(string_wchar
, regexp
,
1380 test_condition
) == CONDITION_TRUE
)) {
1381 stringp
+= string_char_size
;
1382 string_char_size
= get_wchar(&string_wchar
, stringp
);
1384 regexp
+= (int)*regexp
; /* add the class length to regexp */
1385 return (test_repeated_multibyte_char(repeat_startp
, stringp
,
1388 /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */
1390 case IN_MULTIBYTE_CHAR_CLASS
|ONE_OR_MORE
: /* [.....]+ */
1391 case NOT_IN_MULTIBYTE_CHAR_CLASS
|ONE_OR_MORE
:
1394 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1395 * <class_length><class ...>
1396 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1397 * <class_length><class ...>
1399 * NOTE: <class_length> includes the <class_length> byte
1403 (int)(IN_MULTIBYTE_CHAR_CLASS
|ONE_OR_MORE
)) {
1404 test_condition
= IN_CLASS
;
1406 test_condition
= NOT_IN_CLASS
;
1408 regexp
++; /* point to the <class_length> byte */
1410 string_char_size
= get_wchar(&string_wchar
, stringp
);
1411 if ((string_char_size
<= 0) ||
1412 (test_char_against_multibyte_class(string_wchar
, regexp
,
1413 test_condition
) != CONDITION_TRUE
)) {
1416 stringp
+= string_char_size
;
1417 repeat_startp
= stringp
;
1418 string_char_size
= get_wchar(&string_wchar
, stringp
);
1419 while ((string_char_size
> 0) &&
1420 (test_char_against_multibyte_class(string_wchar
,
1421 regexp
, test_condition
) == CONDITION_TRUE
)) {
1422 stringp
+= string_char_size
;
1423 string_char_size
= get_wchar(&string_wchar
, stringp
);
1425 regexp
+= (int)*regexp
; /* add the class length to regexp */
1426 return (test_repeated_multibyte_char(repeat_startp
, stringp
,
1429 /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */
1431 case IN_MULTIBYTE_CHAR_CLASS
|COUNT
: /* [...]{min_count,max_count} */
1432 case NOT_IN_MULTIBYTE_CHAR_CLASS
|COUNT
:
1435 * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1436 * <class_length><class ...><min_count><max_count>
1437 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1438 * <class_length><class ...><min_count><max_count>
1440 * NOTE: <class_length> includes the <class_length> byte
1441 * but not the <minimum_match_count> or
1442 * <maximum_match_count> bytes
1445 if ((int)*regexp
== (int)(IN_MULTIBYTE_CHAR_CLASS
|COUNT
)) {
1446 test_condition
= IN_CLASS
;
1448 test_condition
= NOT_IN_CLASS
;
1450 regexp
++; /* point to the <class_length> byte */
1452 get_match_counts(&nmust_match
, &nextra_matches_allowed
,
1453 regexp
+ (int)*regexp
);
1454 string_char_size
= get_wchar(&string_wchar
, stringp
);
1455 while ((string_char_size
> 0) &&
1456 (test_char_against_multibyte_class(string_wchar
, regexp
,
1457 test_condition
) == CONDITION_TRUE
) &&
1458 (nmust_match
> 0)) {
1460 stringp
+= string_char_size
;
1461 string_char_size
= get_wchar(&string_wchar
, stringp
);
1463 if (nmust_match
> 0) {
1465 } else if (nextra_matches_allowed
== UNLIMITED
) {
1466 repeat_startp
= stringp
;
1467 while ((string_char_size
> 0) &&
1468 (test_char_against_multibyte_class(string_wchar
,
1469 regexp
, test_condition
) == CONDITION_TRUE
)) {
1470 stringp
+= string_char_size
;
1471 string_char_size
= get_wchar(&string_wchar
, stringp
);
1473 regexp
+= (int)*regexp
+ 2;
1474 return (test_repeated_multibyte_char(repeat_startp
, stringp
,
1477 repeat_startp
= stringp
;
1478 while ((string_char_size
> 0) &&
1479 (test_char_against_multibyte_class(string_wchar
,
1480 regexp
, test_condition
) == CONDITION_TRUE
) &&
1481 (nextra_matches_allowed
> 0)) {
1482 nextra_matches_allowed
--;
1483 stringp
+= string_char_size
;
1484 string_char_size
= get_wchar(&string_wchar
, stringp
);
1486 regexp
+= (int)*regexp
+ 2;
1487 return (test_repeated_multibyte_char(repeat_startp
, stringp
,
1490 /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */
1492 case IN_OLD_ASCII_CHAR_CLASS
|ZERO_OR_MORE
: /* [.....]* */
1493 case NOT_IN_OLD_ASCII_CHAR_CLASS
|ZERO_OR_MORE
:
1496 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1497 * <class_length><class ...>
1498 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1499 * <class_length><class ...>
1501 * NOTE: <class_length> includes the <class_length> byte
1505 (int)(IN_OLD_ASCII_CHAR_CLASS
|ZERO_OR_MORE
)) {
1506 test_condition
= IN_CLASS
;
1508 test_condition
= NOT_IN_CLASS
;
1510 regexp
++; /* point to the <class_length> byte */
1512 repeat_startp
= stringp
;
1513 while ((*stringp
!= '\0') &&
1514 (test_char_against_old_ascii_class(*stringp
, regexp
,
1515 test_condition
) == CONDITION_TRUE
)) {
1518 regexp
+= (int)*regexp
; /* add the class length to regexp */
1519 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1522 /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1524 case IN_OLD_ASCII_CHAR_CLASS
|ONE_OR_MORE
: /* [.....]+ */
1525 case NOT_IN_OLD_ASCII_CHAR_CLASS
|ONE_OR_MORE
:
1528 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1529 * <class_length><class ...>
1530 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1531 * <class_length><class ...>
1533 * NOTE: <class length> includes the <class_length> byte
1537 (int)(IN_OLD_ASCII_CHAR_CLASS
|ONE_OR_MORE
)) {
1538 test_condition
= IN_CLASS
;
1540 test_condition
= NOT_IN_CLASS
;
1542 regexp
++; /* point to the <class_length> byte */
1544 if ((*stringp
== '\0') ||
1545 (test_char_against_old_ascii_class(*stringp
, regexp
,
1546 test_condition
) != CONDITION_TRUE
)) {
1550 repeat_startp
= stringp
;
1551 while ((*stringp
!= '\0') &&
1552 (test_char_against_old_ascii_class(*stringp
, regexp
,
1553 test_condition
) == CONDITION_TRUE
)) {
1556 regexp
+= (int)*regexp
; /* add the class length to regexp */
1557 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1560 /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */
1562 case IN_OLD_ASCII_CHAR_CLASS
|COUNT
: /* [...]{min_count,max_count} */
1563 case NOT_IN_OLD_ASCII_CHAR_CLASS
|COUNT
:
1566 * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\
1567 * <class ...><minimum_match_count>\
1568 * <maximum_match_count>
1569 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\
1570 * <class_length><class ...><minimum_match_count>\
1571 * <maximum_match_count>
1573 * NOTE: <class_length> includes the <class_length> byte
1574 * but not the <minimum_match_count> or
1575 * <maximum_match_count> bytes
1578 if ((int)*regexp
== (int)(IN_OLD_ASCII_CHAR_CLASS
|COUNT
)) {
1579 test_condition
= IN_CLASS
;
1581 test_condition
= NOT_IN_CLASS
;
1583 regexp
++; /* point to the <class_length> byte */
1585 get_match_counts(&nmust_match
, &nextra_matches_allowed
,
1586 regexp
+ (int)*regexp
);
1587 while ((*stringp
!= '\0') &&
1588 (test_char_against_old_ascii_class(*stringp
, regexp
,
1589 test_condition
) == CONDITION_TRUE
) &&
1590 (nmust_match
> 0)) {
1594 if (nmust_match
> 0) {
1596 } else if (nextra_matches_allowed
== UNLIMITED
) {
1597 repeat_startp
= stringp
;
1598 while ((*stringp
!= '\0') &&
1599 (test_char_against_old_ascii_class(*stringp
, regexp
,
1600 test_condition
) == CONDITION_TRUE
)) {
1603 regexp
+= (int)*regexp
+ 2;
1604 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1607 repeat_startp
= stringp
;
1608 while ((*stringp
!= '\0') &&
1609 (test_char_against_old_ascii_class(*stringp
, regexp
,
1610 test_condition
) == CONDITION_TRUE
) &&
1611 (nextra_matches_allowed
> 0)) {
1612 nextra_matches_allowed
--;
1615 regexp
+= (int)*regexp
+ 2;
1616 return (test_repeated_ascii_char(repeat_startp
, stringp
,
1619 /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */
1621 case ZERO_OR_MORE_GROUP
: /* (.....)* */
1622 case ZERO_OR_MORE_GROUP
|ADD_256_TO_GROUP_LENGTH
:
1623 case ZERO_OR_MORE_GROUP
|ADD_512_TO_GROUP_LENGTH
:
1624 case ZERO_OR_MORE_GROUP
|ADD_768_TO_GROUP_LENGTH
:
1627 * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1628 * <group_length><compiled_regex...>\
1629 * <END_GROUP|ZERO_OR_MORE><groupn>
1633 * group_length + (256 * ADDED_LENGTH_BITS) ==
1634 * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\
1640 (((unsigned int)*regexp
& ADDED_LENGTH_BITS
) <<
1643 group_length
+= (unsigned int)*regexp
;
1645 repeat_startp
= stringp
;
1646 test_stringp
= test_string(stringp
, regexp
);
1647 while (test_stringp
!= NULL
) {
1648 if (push_stringp(stringp
) == NULL
)
1650 stringp
= test_stringp
;
1651 test_stringp
= test_string(stringp
, regexp
);
1653 regexp
+= group_length
;
1654 return (test_repeated_group(repeat_startp
, stringp
, regexp
));
1656 /* end case ZERO_OR_MORE_GROUP */
1658 case END_GROUP
|ZERO_OR_MORE
: /* (.....)* */
1660 /* encoded as <END_GROUP|ZERO_OR_MORE> */
1662 /* return from recursive call to test_string() */
1664 return ((char *)stringp
);
1666 /* end case END_GROUP|ZERO_OR_MORE */
1668 case ONE_OR_MORE_GROUP
: /* (.....)+ */
1669 case ONE_OR_MORE_GROUP
|ADD_256_TO_GROUP_LENGTH
:
1670 case ONE_OR_MORE_GROUP
|ADD_512_TO_GROUP_LENGTH
:
1671 case ONE_OR_MORE_GROUP
|ADD_768_TO_GROUP_LENGTH
:
1674 * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1675 * <group_length><compiled_regex...>\
1676 * <END_GROUP|ONE_OR_MORE><groupn>
1680 * group_length + (256 * ADDED_LENGTH_BITS) ==
1681 * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\
1686 (((unsigned int)*regexp
& ADDED_LENGTH_BITS
) <<
1689 group_length
+= (unsigned int)*regexp
;
1691 stringp
= test_string(stringp
, regexp
);
1692 if (stringp
== NULL
)
1694 repeat_startp
= stringp
;
1695 test_stringp
= test_string(stringp
, regexp
);
1696 while (test_stringp
!= NULL
) {
1697 if (push_stringp(stringp
) == NULL
)
1699 stringp
= test_stringp
;
1700 test_stringp
= test_string(stringp
, regexp
);
1702 regexp
+= group_length
;
1703 return (test_repeated_group(repeat_startp
, stringp
, regexp
));
1705 /* end case ONE_OR_MORE_GROUP */
1707 case END_GROUP
|ONE_OR_MORE
: /* (.....)+ */
1709 /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */
1711 /* return from recursive call to test_string() */
1713 return ((char *)stringp
);
1715 /* end case END_GROUP|ONE_OR_MORE */
1717 case COUNTED_GROUP
: /* (.....){max_count,min_count} */
1718 case COUNTED_GROUP
|ADD_256_TO_GROUP_LENGTH
:
1719 case COUNTED_GROUP
|ADD_512_TO_GROUP_LENGTH
:
1720 case COUNTED_GROUP
|ADD_768_TO_GROUP_LENGTH
:
1723 * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
1724 * <compiled_regex...>\<END_GROUP|COUNT><groupn>\
1725 * <minimum_match_count><maximum_match_count>
1729 * group_length + (256 * ADDED_LENGTH_BITS) ==
1730 * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>)
1732 * but does not include the <minimum_match_count> or
1733 * <maximum_match_count> bytes
1737 (((unsigned int)*regexp
& ADDED_LENGTH_BITS
) <<
1740 group_length
+= (unsigned int)*regexp
;
1742 get_match_counts(&nmust_match
, &nextra_matches_allowed
,
1743 regexp
+ group_length
);
1744 test_stringp
= test_string(stringp
, regexp
);
1745 while ((test_stringp
!= NULL
) && (nmust_match
> 0)) {
1746 stringp
= test_stringp
;
1748 test_stringp
= test_string(stringp
, regexp
);
1750 if (nmust_match
> 0) {
1752 } else if (nextra_matches_allowed
== UNLIMITED
) {
1753 repeat_startp
= stringp
;
1754 while (test_stringp
!= NULL
) {
1755 if (push_stringp(stringp
) == NULL
)
1757 stringp
= test_stringp
;
1758 test_stringp
= test_string(stringp
, regexp
);
1760 regexp
+= group_length
+ 2;
1761 return (test_repeated_group(repeat_startp
, stringp
,
1764 repeat_startp
= stringp
;
1765 while ((test_stringp
!= NULL
) &&
1766 (nextra_matches_allowed
> 0)) {
1767 nextra_matches_allowed
--;
1768 if (push_stringp(stringp
) == NULL
)
1770 stringp
= test_stringp
;
1771 test_stringp
= test_string(stringp
, regexp
);
1773 regexp
+= group_length
+ 2;
1774 return (test_repeated_group(repeat_startp
, stringp
,
1777 /* end case COUNTED_GROUP */
1779 case END_GROUP
|COUNT
: /* (.....){max_count,min_count} */
1781 /* encoded as <END_GROUP|COUNT> */
1783 /* return from recursive call to test_string() */
1787 /* end case END_GROUP|COUNT */
1789 case END_OF_STRING_MARK
:
1791 /* encoded as <END_OF_STRING_MARK><END_REGEX> */
1793 if (*stringp
== '\0') {
1798 break; /* end case END_OF_STRING_MARK */
1800 case END_REGEX
: /* end of the compiled regular expression */
1802 /* encoded as <END_REGEX> */
1806 /* end case END_REGEX */
1812 } /* end switch (*regexp) */
1814 } /* end for (;;) */
1816 } /* test_string() */