1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
41 /* This module contains some convenience functions for extracting substrings
42 from the subject string after a regex match has succeeded. The original idea
43 for these functions came from Scott Wimer. */
50 #include "pcre_internal.h"
53 /*************************************************
54 * Find number for named string *
55 *************************************************/
57 /* This function is used by the get_first_set() function below, as well
58 as being generally available. It assumes that names are unique.
61 code the compiled regex
62 stringname the name whose number is required
64 Returns: the number of the named parentheses, or a negative number
65 (PCRE_ERROR_NOSUBSTRING) if not found
69 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
70 pcre_get_stringnumber(const pcre
*code
, const char *stringname
)
72 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
73 pcre16_get_stringnumber(const pcre16
*code
, PCRE_SPTR16 stringname
)
79 pcre_uchar
*nametable
;
82 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMECOUNT
, &top
)) != 0)
84 if (top
<= 0) return PCRE_ERROR_NOSUBSTRING
;
86 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMEENTRYSIZE
, &entrysize
)) != 0)
88 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMETABLE
, &nametable
)) != 0)
92 if ((rc
= pcre16_fullinfo(code
, NULL
, PCRE_INFO_NAMECOUNT
, &top
)) != 0)
94 if (top
<= 0) return PCRE_ERROR_NOSUBSTRING
;
96 if ((rc
= pcre16_fullinfo(code
, NULL
, PCRE_INFO_NAMEENTRYSIZE
, &entrysize
)) != 0)
98 if ((rc
= pcre16_fullinfo(code
, NULL
, PCRE_INFO_NAMETABLE
, &nametable
)) != 0)
105 int mid
= (top
+ bot
) / 2;
106 pcre_uchar
*entry
= nametable
+ entrysize
*mid
;
107 int c
= STRCMP_UC_UC((pcre_uchar
*)stringname
,
108 (pcre_uchar
*)(entry
+ IMM2_SIZE
));
109 if (c
== 0) return GET2(entry
, 0);
110 if (c
> 0) bot
= mid
+ 1; else top
= mid
;
113 return PCRE_ERROR_NOSUBSTRING
;
118 /*************************************************
119 * Find (multiple) entries for named string *
120 *************************************************/
122 /* This is used by the get_first_set() function below, as well as being
123 generally available. It is used when duplicated names are permitted.
126 code the compiled regex
127 stringname the name whose entries required
128 firstptr where to put the pointer to the first entry
129 lastptr where to put the pointer to the last entry
131 Returns: the length of each entry, or a negative number
132 (PCRE_ERROR_NOSUBSTRING) if not found
136 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
137 pcre_get_stringtable_entries(const pcre
*code
, const char *stringname
,
138 char **firstptr
, char **lastptr
)
140 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
141 pcre16_get_stringtable_entries(const pcre16
*code
, PCRE_SPTR16 stringname
,
142 PCRE_UCHAR16
**firstptr
, PCRE_UCHAR16
**lastptr
)
148 pcre_uchar
*nametable
, *lastentry
;
151 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMECOUNT
, &top
)) != 0)
153 if (top
<= 0) return PCRE_ERROR_NOSUBSTRING
;
155 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMEENTRYSIZE
, &entrysize
)) != 0)
157 if ((rc
= pcre_fullinfo(code
, NULL
, PCRE_INFO_NAMETABLE
, &nametable
)) != 0)
160 #ifdef COMPILE_PCRE16
161 if ((rc
= pcre16_fullinfo(code
, NULL
, PCRE_INFO_NAMECOUNT
, &top
)) != 0)
163 if (top
<= 0) return PCRE_ERROR_NOSUBSTRING
;
165 if ((rc
= pcre16_fullinfo(code
, NULL
, PCRE_INFO_NAMEENTRYSIZE
, &entrysize
)) != 0)
167 if ((rc
= pcre16_fullinfo(code
, NULL
, PCRE_INFO_NAMETABLE
, &nametable
)) != 0)
171 lastentry
= nametable
+ entrysize
* (top
- 1);
175 int mid
= (top
+ bot
) / 2;
176 pcre_uchar
*entry
= nametable
+ entrysize
*mid
;
177 int c
= STRCMP_UC_UC((pcre_uchar
*)stringname
,
178 (pcre_uchar
*)(entry
+ IMM2_SIZE
));
181 pcre_uchar
*first
= entry
;
182 pcre_uchar
*last
= entry
;
183 while (first
> nametable
)
185 if (STRCMP_UC_UC((pcre_uchar
*)stringname
,
186 (pcre_uchar
*)(first
- entrysize
+ IMM2_SIZE
)) != 0) break;
189 while (last
< lastentry
)
191 if (STRCMP_UC_UC((pcre_uchar
*)stringname
,
192 (pcre_uchar
*)(last
+ entrysize
+ IMM2_SIZE
)) != 0) break;
196 *firstptr
= (char *)first
;
197 *lastptr
= (char *)last
;
199 *firstptr
= (PCRE_UCHAR16
*)first
;
200 *lastptr
= (PCRE_UCHAR16
*)last
;
204 if (c
> 0) bot
= mid
+ 1; else top
= mid
;
207 return PCRE_ERROR_NOSUBSTRING
;
212 /*************************************************
213 * Find first set of multiple named strings *
214 *************************************************/
216 /* This function allows for duplicate names in the table of named substrings.
217 It returns the number of the first one that was set in a pattern match.
220 code the compiled regex
221 stringname the name of the capturing substring
222 ovector the vector of matched substrings
224 Returns: the number of the first that is set,
225 or the number of the last one if none are set,
226 or a negative number on error
231 get_first_set(const pcre
*code
, const char *stringname
, int *ovector
)
234 get_first_set(const pcre16
*code
, PCRE_SPTR16 stringname
, int *ovector
)
237 const REAL_PCRE
*re
= (const REAL_PCRE
*)code
;
243 PCRE_UCHAR16
*first
, *last
;
247 if ((re
->options
& PCRE_DUPNAMES
) == 0 && (re
->flags
& PCRE_JCHANGED
) == 0)
248 return pcre_get_stringnumber(code
, stringname
);
249 entrysize
= pcre_get_stringtable_entries(code
, stringname
, &first
, &last
);
251 if ((re
->options
& PCRE_DUPNAMES
) == 0 && (re
->flags
& PCRE_JCHANGED
) == 0)
252 return pcre16_get_stringnumber(code
, stringname
);
253 entrysize
= pcre16_get_stringtable_entries(code
, stringname
, &first
, &last
);
255 if (entrysize
<= 0) return entrysize
;
256 for (entry
= (pcre_uchar
*)first
; entry
<= (pcre_uchar
*)last
; entry
+= entrysize
)
258 int n
= GET2(entry
, 0);
259 if (ovector
[n
*2] >= 0) return n
;
261 return GET2(entry
, 0);
267 /*************************************************
268 * Copy captured string to given buffer *
269 *************************************************/
271 /* This function copies a single captured substring into a given buffer.
272 Note that we use memcpy() rather than strncpy() in case there are binary zeros
276 subject the subject string that was matched
277 ovector pointer to the offsets table
278 stringcount the number of substrings that were captured
279 (i.e. the yield of the pcre_exec call, unless
280 that was zero, in which case it should be 1/3
281 of the offset table size)
282 stringnumber the number of the required substring
283 buffer where to put the substring
284 size the size of the buffer
286 Returns: if successful:
287 the length of the copied string, not including the zero
288 that is put on the end; can be zero
290 PCRE_ERROR_NOMEMORY (-6) buffer too small
291 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
295 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
296 pcre_copy_substring(const char *subject
, int *ovector
, int stringcount
,
297 int stringnumber
, char *buffer
, int size
)
299 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
300 pcre16_copy_substring(PCRE_SPTR16 subject
, int *ovector
, int stringcount
,
301 int stringnumber
, PCRE_UCHAR16
*buffer
, int size
)
305 if (stringnumber
< 0 || stringnumber
>= stringcount
)
306 return PCRE_ERROR_NOSUBSTRING
;
308 yield
= ovector
[stringnumber
+1] - ovector
[stringnumber
];
309 if (size
< yield
+ 1) return PCRE_ERROR_NOMEMORY
;
310 memcpy(buffer
, subject
+ ovector
[stringnumber
], IN_UCHARS(yield
));
317 /*************************************************
318 * Copy named captured string to given buffer *
319 *************************************************/
321 /* This function copies a single captured substring into a given buffer,
322 identifying it by name. If the regex permits duplicate names, the first
323 substring that is set is chosen.
326 code the compiled regex
327 subject the subject string that was matched
328 ovector pointer to the offsets table
329 stringcount the number of substrings that were captured
330 (i.e. the yield of the pcre_exec call, unless
331 that was zero, in which case it should be 1/3
332 of the offset table size)
333 stringname the name of the required substring
334 buffer where to put the substring
335 size the size of the buffer
337 Returns: if successful:
338 the length of the copied string, not including the zero
339 that is put on the end; can be zero
341 PCRE_ERROR_NOMEMORY (-6) buffer too small
342 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
346 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
347 pcre_copy_named_substring(const pcre
*code
, const char *subject
,
348 int *ovector
, int stringcount
, const char *stringname
,
349 char *buffer
, int size
)
351 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
352 pcre16_copy_named_substring(const pcre16
*code
, PCRE_SPTR16 subject
,
353 int *ovector
, int stringcount
, PCRE_SPTR16 stringname
,
354 PCRE_UCHAR16
*buffer
, int size
)
357 int n
= get_first_set(code
, stringname
, ovector
);
358 if (n
<= 0) return n
;
360 return pcre_copy_substring(subject
, ovector
, stringcount
, n
, buffer
, size
);
362 return pcre16_copy_substring(subject
, ovector
, stringcount
, n
, buffer
, size
);
368 /*************************************************
369 * Copy all captured strings to new store *
370 *************************************************/
372 /* This function gets one chunk of store and builds a list of pointers and all
373 of the captured substrings in it. A NULL pointer is put on the end of the list.
376 subject the subject string that was matched
377 ovector pointer to the offsets table
378 stringcount the number of substrings that were captured
379 (i.e. the yield of the pcre_exec call, unless
380 that was zero, in which case it should be 1/3
381 of the offset table size)
382 listptr set to point to the list of pointers
384 Returns: if successful: 0
386 PCRE_ERROR_NOMEMORY (-6) failed to get store
390 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
391 pcre_get_substring_list(const char *subject
, int *ovector
, int stringcount
,
392 const char ***listptr
)
394 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
395 pcre16_get_substring_list(PCRE_SPTR16 subject
, int *ovector
, int stringcount
,
396 PCRE_SPTR16
**listptr
)
400 int size
= sizeof(pcre_uchar
*);
401 int double_count
= stringcount
* 2;
402 pcre_uchar
**stringlist
;
405 for (i
= 0; i
< double_count
; i
+= 2)
406 size
+= sizeof(pcre_uchar
*) + IN_UCHARS(ovector
[i
+1] - ovector
[i
] + 1);
408 stringlist
= (pcre_uchar
**)(PUBL(malloc
))(size
);
409 if (stringlist
== NULL
) return PCRE_ERROR_NOMEMORY
;
412 *listptr
= (const char **)stringlist
;
414 *listptr
= (PCRE_SPTR16
*)stringlist
;
416 p
= (pcre_uchar
*)(stringlist
+ stringcount
+ 1);
418 for (i
= 0; i
< double_count
; i
+= 2)
420 int len
= ovector
[i
+1] - ovector
[i
];
421 memcpy(p
, subject
+ ovector
[i
], IN_UCHARS(len
));
433 /*************************************************
434 * Free store obtained by get_substring_list *
435 *************************************************/
437 /* This function exists for the benefit of people calling PCRE from non-C
438 programs that can call its functions, but not free() or (PUBL(free))()
441 Argument: the result of a previous pcre_get_substring_list()
446 PCRE_EXP_DEFN
void PCRE_CALL_CONVENTION
447 pcre_free_substring_list(const char **pointer
)
449 PCRE_EXP_DEFN
void PCRE_CALL_CONVENTION
450 pcre16_free_substring_list(PCRE_SPTR16
*pointer
)
453 (PUBL(free
))((void *)pointer
);
458 /*************************************************
459 * Copy captured string to new store *
460 *************************************************/
462 /* This function copies a single captured substring into a piece of new
466 subject the subject string that was matched
467 ovector pointer to the offsets table
468 stringcount the number of substrings that were captured
469 (i.e. the yield of the pcre_exec call, unless
470 that was zero, in which case it should be 1/3
471 of the offset table size)
472 stringnumber the number of the required substring
473 stringptr where to put a pointer to the substring
475 Returns: if successful:
476 the length of the string, not including the zero that
477 is put on the end; can be zero
479 PCRE_ERROR_NOMEMORY (-6) failed to get store
480 PCRE_ERROR_NOSUBSTRING (-7) substring not present
484 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
485 pcre_get_substring(const char *subject
, int *ovector
, int stringcount
,
486 int stringnumber
, const char **stringptr
)
488 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
489 pcre16_get_substring(PCRE_SPTR16 subject
, int *ovector
, int stringcount
,
490 int stringnumber
, PCRE_SPTR16
*stringptr
)
494 pcre_uchar
*substring
;
495 if (stringnumber
< 0 || stringnumber
>= stringcount
)
496 return PCRE_ERROR_NOSUBSTRING
;
498 yield
= ovector
[stringnumber
+1] - ovector
[stringnumber
];
499 substring
= (pcre_uchar
*)(PUBL(malloc
))(IN_UCHARS(yield
+ 1));
500 if (substring
== NULL
) return PCRE_ERROR_NOMEMORY
;
501 memcpy(substring
, subject
+ ovector
[stringnumber
], IN_UCHARS(yield
));
502 substring
[yield
] = 0;
504 *stringptr
= (const char *)substring
;
506 *stringptr
= (PCRE_SPTR16
)substring
;
513 /*************************************************
514 * Copy named captured string to new store *
515 *************************************************/
517 /* This function copies a single captured substring, identified by name, into
518 new store. If the regex permits duplicate names, the first substring that is
522 code the compiled regex
523 subject the subject string that was matched
524 ovector pointer to the offsets table
525 stringcount the number of substrings that were captured
526 (i.e. the yield of the pcre_exec call, unless
527 that was zero, in which case it should be 1/3
528 of the offset table size)
529 stringname the name of the required substring
530 stringptr where to put the pointer
532 Returns: if successful:
533 the length of the copied string, not including the zero
534 that is put on the end; can be zero
536 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
537 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
541 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
542 pcre_get_named_substring(const pcre
*code
, const char *subject
,
543 int *ovector
, int stringcount
, const char *stringname
,
544 const char **stringptr
)
546 PCRE_EXP_DEFN
int PCRE_CALL_CONVENTION
547 pcre16_get_named_substring(const pcre16
*code
, PCRE_SPTR16 subject
,
548 int *ovector
, int stringcount
, PCRE_SPTR16 stringname
,
549 PCRE_SPTR16
*stringptr
)
552 int n
= get_first_set(code
, stringname
, ovector
);
553 if (n
<= 0) return n
;
555 return pcre_get_substring(subject
, ovector
, stringcount
, n
, stringptr
);
557 return pcre16_get_substring(subject
, ovector
, stringcount
, n
, stringptr
);
564 /*************************************************
565 * Free store obtained by get_substring *
566 *************************************************/
568 /* This function exists for the benefit of people calling PCRE from non-C
569 programs that can call its functions, but not free() or (PUBL(free))()
572 Argument: the result of a previous pcre_get_substring()
577 PCRE_EXP_DEFN
void PCRE_CALL_CONVENTION
578 pcre_free_substring(const char *pointer
)
580 PCRE_EXP_DEFN
void PCRE_CALL_CONVENTION
581 pcre16_free_substring(PCRE_SPTR16 pointer
)
584 (PUBL(free
))((void *)pointer
);
587 /* End of pcre_get.c */