1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
41 /* This module contains some convenience functions for extracting substrings
42 from the subject string after a regex match has succeeded. The original idea
43 for these functions came from Scott Wimer. */
48 #include "pcre_internal.h"
51 /*************************************************
52 * Find number for named string *
53 *************************************************/
55 /* This function is used by the get_first_set() function below, as well
56 as being generally available. It assumes that names are unique.
59 code the compiled regex
60 stringname the name whose number is required
62 Returns: the number of the named parentheses, or a negative number
63 (PCRE_ERROR_NOSUBSTRING) if not found
67 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
68 pcre_get_stringnumber(const pcre
*code
, const char *stringname
)
70 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
71 pcre16_get_stringnumber(const pcre16
*code
, PCRE_SPTR16 stringname
)
77 pcre_uchar
*nametable
;
80 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMECOUNT
, &top
)) != 0)
82 if (top
<= 0) return PCRE_ERROR_NOSUBSTRING
;
84 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMEENTRYSIZE
, &entrysize
)) != 0)
86 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMETABLE
, &nametable
)) != 0)
90 if ((rc
= pcre16_fullinfo(code
, NULL
, PCRE_INFO_NAMECOUNT
, &top
)) != 0)
92 if (top
<= 0) return PCRE_ERROR_NOSUBSTRING
;
94 if ((rc
= pcre16_fullinfo(code
, NULL
, PCRE_INFO_NAMEENTRYSIZE
, &entrysize
)) != 0)
96 if ((rc
= pcre16_fullinfo(code
, NULL
, PCRE_INFO_NAMETABLE
, &nametable
)) != 0)
103 int mid
= (top
+ bot
) / 2;
104 pcre_uchar
*entry
= nametable
+ entrysize
*mid
;
105 int c
= STRCMP_UC_UC((pcre_uchar
*)stringname
,
106 (pcre_uchar
*)(entry
+ IMM2_SIZE
));
107 if (c
== 0) return GET2(entry
, 0);
108 if (c
> 0) bot
= mid
+ 1; else top
= mid
;
111 return PCRE_ERROR_NOSUBSTRING
;
116 /*************************************************
117 * Find (multiple) entries for named string *
118 *************************************************/
120 /* This is used by the get_first_set() function below, as well as being
121 generally available. It is used when duplicated names are permitted.
124 code the compiled regex
125 stringname the name whose entries required
126 firstptr where to put the pointer to the first entry
127 lastptr where to put the pointer to the last entry
129 Returns: the length of each entry, or a negative number
130 (PCRE_ERROR_NOSUBSTRING) if not found
134 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
135 pcre_get_stringtable_entries(const pcre
*code
, const char *stringname
,
136 char **firstptr
, char **lastptr
)
138 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
139 pcre16_get_stringtable_entries(const pcre16
*code
, PCRE_SPTR16 stringname
,
140 PCRE_UCHAR16
**firstptr
, PCRE_UCHAR16
**lastptr
)
146 pcre_uchar
*nametable
, *lastentry
;
149 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMECOUNT
, &top
)) != 0)
151 if (top
<= 0) return PCRE_ERROR_NOSUBSTRING
;
153 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMEENTRYSIZE
, &entrysize
)) != 0)
155 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMETABLE
, &nametable
)) != 0)
158 #ifdef COMPILE_PCRE16
159 if ((rc
= pcre16_fullinfo(code
, NULL
, PCRE_INFO_NAMECOUNT
, &top
)) != 0)
161 if (top
<= 0) return PCRE_ERROR_NOSUBSTRING
;
163 if ((rc
= pcre16_fullinfo(code
, NULL
, PCRE_INFO_NAMEENTRYSIZE
, &entrysize
)) != 0)
165 if ((rc
= pcre16_fullinfo(code
, NULL
, PCRE_INFO_NAMETABLE
, &nametable
)) != 0)
169 lastentry
= nametable
+ entrysize
* (top
- 1);
173 int mid
= (top
+ bot
) / 2;
174 pcre_uchar
*entry
= nametable
+ entrysize
*mid
;
175 int c
= STRCMP_UC_UC((pcre_uchar
*)stringname
,
176 (pcre_uchar
*)(entry
+ IMM2_SIZE
));
179 pcre_uchar
*first
= entry
;
180 pcre_uchar
*last
= entry
;
181 while (first
> nametable
)
183 if (STRCMP_UC_UC((pcre_uchar
*)stringname
,
184 (pcre_uchar
*)(first
- entrysize
+ IMM2_SIZE
)) != 0) break;
187 while (last
< lastentry
)
189 if (STRCMP_UC_UC((pcre_uchar
*)stringname
,
190 (pcre_uchar
*)(last
+ entrysize
+ IMM2_SIZE
)) != 0) break;
194 *firstptr
= (char *)first
;
195 *lastptr
= (char *)last
;
197 *firstptr
= (PCRE_UCHAR16
*)first
;
198 *lastptr
= (PCRE_UCHAR16
*)last
;
202 if (c
> 0) bot
= mid
+ 1; else top
= mid
;
205 return PCRE_ERROR_NOSUBSTRING
;
210 /*************************************************
211 * Find first set of multiple named strings *
212 *************************************************/
214 /* This function allows for duplicate names in the table of named substrings.
215 It returns the number of the first one that was set in a pattern match.
218 code the compiled regex
219 stringname the name of the capturing substring
220 ovector the vector of matched substrings
222 Returns: the number of the first that is set,
223 or the number of the last one if none are set,
224 or a negative number on error
229 get_first_set(const pcre
*code
, const char *stringname
, int *ovector
)
232 get_first_set(const pcre16
*code
, PCRE_SPTR16 stringname
, int *ovector
)
235 const REAL_PCRE
*re
= (const REAL_PCRE
*)code
;
241 PCRE_UCHAR16
*first
, *last
;
245 if ((re
->options
& PCRE_DUPNAMES
) == 0 && (re
->flags
& PCRE_JCHANGED
) == 0)
246 return pcre_get_stringnumber(code
, stringname
);
247 entrysize
= pcre_get_stringtable_entries(code
, stringname
, &first
, &last
);
249 if ((re
->options
& PCRE_DUPNAMES
) == 0 && (re
->flags
& PCRE_JCHANGED
) == 0)
250 return pcre16_get_stringnumber(code
, stringname
);
251 entrysize
= pcre16_get_stringtable_entries(code
, stringname
, &first
, &last
);
253 if (entrysize
<= 0) return entrysize
;
254 for (entry
= (pcre_uchar
*)first
; entry
<= (pcre_uchar
*)last
; entry
+= entrysize
)
256 int n
= GET2(entry
, 0);
257 if (ovector
[n
*2] >= 0) return n
;
259 return GET2(entry
, 0);
265 /*************************************************
266 * Copy captured string to given buffer *
267 *************************************************/
269 /* This function copies a single captured substring into a given buffer.
270 Note that we use memcpy() rather than strncpy() in case there are binary zeros
274 subject the subject string that was matched
275 ovector pointer to the offsets table
276 stringcount the number of substrings that were captured
277 (i.e. the yield of the pcre_exec call, unless
278 that was zero, in which case it should be 1/3
279 of the offset table size)
280 stringnumber the number of the required substring
281 buffer where to put the substring
282 size the size of the buffer
284 Returns: if successful:
285 the length of the copied string, not including the zero
286 that is put on the end; can be zero
288 PCRE_ERROR_NOMEMORY (-6) buffer too small
289 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
293 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
294 pcre_copy_substring(const char *subject
, int *ovector
, int stringcount
,
295 int stringnumber
, char *buffer
, int size
)
297 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
298 pcre16_copy_substring(PCRE_SPTR16 subject
, int *ovector
, int stringcount
,
299 int stringnumber
, PCRE_UCHAR16
*buffer
, int size
)
303 if (stringnumber
< 0 || stringnumber
>= stringcount
)
304 return PCRE_ERROR_NOSUBSTRING
;
306 yield
= ovector
[stringnumber
+1] - ovector
[stringnumber
];
307 if (size
< yield
+ 1) return PCRE_ERROR_NOMEMORY
;
308 memcpy(buffer
, subject
+ ovector
[stringnumber
], IN_UCHARS(yield
));
315 /*************************************************
316 * Copy named captured string to given buffer *
317 *************************************************/
319 /* This function copies a single captured substring into a given buffer,
320 identifying it by name. If the regex permits duplicate names, the first
321 substring that is set is chosen.
324 code the compiled regex
325 subject the subject string that was matched
326 ovector pointer to the offsets table
327 stringcount the number of substrings that were captured
328 (i.e. the yield of the pcre_exec call, unless
329 that was zero, in which case it should be 1/3
330 of the offset table size)
331 stringname the name of the required substring
332 buffer where to put the substring
333 size the size of the buffer
335 Returns: if successful:
336 the length of the copied string, not including the zero
337 that is put on the end; can be zero
339 PCRE_ERROR_NOMEMORY (-6) buffer too small
340 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
344 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
345 pcre_copy_named_substring(const pcre
*code
, const char *subject
,
346 int *ovector
, int stringcount
, const char *stringname
,
347 char *buffer
, int size
)
349 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
350 pcre16_copy_named_substring(const pcre16
*code
, PCRE_SPTR16 subject
,
351 int *ovector
, int stringcount
, PCRE_SPTR16 stringname
,
352 PCRE_UCHAR16
*buffer
, int size
)
355 int n
= get_first_set(code
, stringname
, ovector
);
356 if (n
<= 0) return n
;
358 return pcre_copy_substring(subject
, ovector
, stringcount
, n
, buffer
, size
);
360 return pcre16_copy_substring(subject
, ovector
, stringcount
, n
, buffer
, size
);
366 /*************************************************
367 * Copy all captured strings to new store *
368 *************************************************/
370 /* This function gets one chunk of store and builds a list of pointers and all
371 of the captured substrings in it. A NULL pointer is put on the end of the list.
374 subject the subject string that was matched
375 ovector pointer to the offsets table
376 stringcount the number of substrings that were captured
377 (i.e. the yield of the pcre_exec call, unless
378 that was zero, in which case it should be 1/3
379 of the offset table size)
380 listptr set to point to the list of pointers
382 Returns: if successful: 0
384 PCRE_ERROR_NOMEMORY (-6) failed to get store
388 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
389 pcre_get_substring_list(const char *subject
, int *ovector
, int stringcount
,
390 const char ***listptr
)
392 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
393 pcre16_get_substring_list(PCRE_SPTR16 subject
, int *ovector
, int stringcount
,
394 PCRE_SPTR16
**listptr
)
398 int size
= sizeof(pcre_uchar
*);
399 int double_count
= stringcount
* 2;
400 pcre_uchar
**stringlist
;
403 for (i
= 0; i
< double_count
; i
+= 2)
404 size
+= sizeof(pcre_uchar
*) + IN_UCHARS(ovector
[i
+1] - ovector
[i
] + 1);
406 stringlist
= (pcre_uchar
**)(PUBL(malloc
))(size
);
407 if (stringlist
== NULL
) return PCRE_ERROR_NOMEMORY
;
410 *listptr
= (const char **)stringlist
;
412 *listptr
= (PCRE_SPTR16
*)stringlist
;
414 p
= (pcre_uchar
*)(stringlist
+ stringcount
+ 1);
416 for (i
= 0; i
< double_count
; i
+= 2)
418 int len
= ovector
[i
+1] - ovector
[i
];
419 memcpy(p
, subject
+ ovector
[i
], IN_UCHARS(len
));
431 /*************************************************
432 * Free store obtained by get_substring_list *
433 *************************************************/
435 /* This function exists for the benefit of people calling PCRE from non-C
436 programs that can call its functions, but not free() or (PUBL(free))()
439 Argument: the result of a previous pcre_get_substring_list()
444 PCRE_EXP_DEFN
void PCRE_CALL_CONVENTION
445 pcre_free_substring_list(const char **pointer
)
447 PCRE_EXP_DEFN
void PCRE_CALL_CONVENTION
448 pcre16_free_substring_list(PCRE_SPTR16
*pointer
)
451 (PUBL(free
))((void *)pointer
);
456 /*************************************************
457 * Copy captured string to new store *
458 *************************************************/
460 /* This function copies a single captured substring into a piece of new
464 subject the subject string that was matched
465 ovector pointer to the offsets table
466 stringcount the number of substrings that were captured
467 (i.e. the yield of the pcre_exec call, unless
468 that was zero, in which case it should be 1/3
469 of the offset table size)
470 stringnumber the number of the required substring
471 stringptr where to put a pointer to the substring
473 Returns: if successful:
474 the length of the string, not including the zero that
475 is put on the end; can be zero
477 PCRE_ERROR_NOMEMORY (-6) failed to get store
478 PCRE_ERROR_NOSUBSTRING (-7) substring not present
482 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
483 pcre_get_substring(const char *subject
, int *ovector
, int stringcount
,
484 int stringnumber
, const char **stringptr
)
486 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
487 pcre16_get_substring(PCRE_SPTR16 subject
, int *ovector
, int stringcount
,
488 int stringnumber
, PCRE_SPTR16
*stringptr
)
492 pcre_uchar
*substring
;
493 if (stringnumber
< 0 || stringnumber
>= stringcount
)
494 return PCRE_ERROR_NOSUBSTRING
;
496 yield
= ovector
[stringnumber
+1] - ovector
[stringnumber
];
497 substring
= (pcre_uchar
*)(PUBL(malloc
))(IN_UCHARS(yield
+ 1));
498 if (substring
== NULL
) return PCRE_ERROR_NOMEMORY
;
499 memcpy(substring
, subject
+ ovector
[stringnumber
], IN_UCHARS(yield
));
500 substring
[yield
] = 0;
502 *stringptr
= (const char *)substring
;
504 *stringptr
= (PCRE_SPTR16
)substring
;
511 /*************************************************
512 * Copy named captured string to new store *
513 *************************************************/
515 /* This function copies a single captured substring, identified by name, into
516 new store. If the regex permits duplicate names, the first substring that is
520 code the compiled regex
521 subject the subject string that was matched
522 ovector pointer to the offsets table
523 stringcount the number of substrings that were captured
524 (i.e. the yield of the pcre_exec call, unless
525 that was zero, in which case it should be 1/3
526 of the offset table size)
527 stringname the name of the required substring
528 stringptr where to put the pointer
530 Returns: if successful:
531 the length of the copied string, not including the zero
532 that is put on the end; can be zero
534 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
535 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
539 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
540 pcre_get_named_substring(const pcre
*code
, const char *subject
,
541 int *ovector
, int stringcount
, const char *stringname
,
542 const char **stringptr
)
544 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
545 pcre16_get_named_substring(const pcre16
*code
, PCRE_SPTR16 subject
,
546 int *ovector
, int stringcount
, PCRE_SPTR16 stringname
,
547 PCRE_SPTR16
*stringptr
)
550 int n
= get_first_set(code
, stringname
, ovector
);
551 if (n
<= 0) return n
;
553 return pcre_get_substring(subject
, ovector
, stringcount
, n
, stringptr
);
555 return pcre16_get_substring(subject
, ovector
, stringcount
, n
, stringptr
);
562 /*************************************************
563 * Free store obtained by get_substring *
564 *************************************************/
566 /* This function exists for the benefit of people calling PCRE from non-C
567 programs that can call its functions, but not free() or (PUBL(free))()
570 Argument: the result of a previous pcre_get_substring()
575 PCRE_EXP_DEFN
void PCRE_CALL_CONVENTION
576 pcre_free_substring(const char *pointer
)
578 PCRE_EXP_DEFN
void PCRE_CALL_CONVENTION
579 pcre16_free_substring(PCRE_SPTR16 pointer
)
582 (PUBL(free
))((void *)pointer
);
585 /* End of pcre_get.c */