1 // Copyright 2003-2009 Google Inc. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // This is a variant of PCRE's pcrecpp.cc, originally written at Google.
6 // The main changes are the addition of the HitLimit method and
7 // compilation as PCRE in namespace re2.
10 #include "util/util.h"
11 #include "util/flags.h"
12 #include "util/pcre.h"
15 #define strtoll _strtoi64
16 #define strtoull _strtoui64
19 #define PCREPORT(level) LOG(level)
21 // Default PCRE limits.
22 // Defaults chosen to allow a plausible amount of CPU and
23 // not exceed main thread stacks. Note that other threads
24 // often have smaller stacks, and therefore tightening
25 // regexp_stack_limit may frequently be necessary.
26 DEFINE_int32(regexp_stack_limit
, 256<<10, "default PCRE stack limit (bytes)");
27 DEFINE_int32(regexp_match_limit
, 1000000,
28 "default PCRE match limit (function calls)");
32 // Maximum number of args we can set
33 static const int kMaxArgs
= 16;
34 static const int kVecSize
= (1 + kMaxArgs
) * 3; // results + PCRE workspace
36 // Approximate size of a recursive invocation of PCRE's
37 // internal "match()" frame. This varies depending on the
38 // compiler and architecture, of course, so the constant is
39 // just a conservative estimate. To find the exact number,
40 // run regexp_unittest with --regexp_stack_limit=0 under
41 // a debugger and look at the frames when it crashes.
42 // The exact frame size was 656 in production on 2008/02/03.
43 static const int kPCREFrameSize
= 700;
45 // Special name for missing C++ arguments.
46 PCRE::Arg
PCRE::no_more_args((void*)NULL
);
48 const PCRE::PartialMatchFunctor
PCRE::PartialMatch
= { };
49 const PCRE::FullMatchFunctor
PCRE::FullMatch
= { } ;
50 const PCRE::ConsumeFunctor
PCRE::Consume
= { };
51 const PCRE::FindAndConsumeFunctor
PCRE::FindAndConsume
= { };
53 // If a regular expression has no error, its error_ field points here
54 static const string empty_string
;
56 void PCRE::Init(const char* pattern
, Option options
, int match_limit
,
57 int stack_limit
, bool report_errors
) {
60 match_limit_
= match_limit
;
61 stack_limit_
= stack_limit
;
63 error_
= &empty_string
;
64 report_errors_
= report_errors
;
68 if (options
& ~(EnabledCompileOptions
| EnabledExecOptions
)) {
69 error_
= new string("illegal regexp option");
71 << "Error compiling '" << pattern
<< "': illegal regexp option";
73 re_partial_
= Compile(UNANCHORED
);
74 if (re_partial_
!= NULL
) {
75 re_full_
= Compile(ANCHOR_BOTH
);
80 PCRE::PCRE(const char* pattern
) {
81 Init(pattern
, None
, 0, 0, true);
83 PCRE::PCRE(const char* pattern
, Option option
) {
84 Init(pattern
, option
, 0, 0, true);
86 PCRE::PCRE(const string
& pattern
) {
87 Init(pattern
.c_str(), None
, 0, 0, true);
89 PCRE::PCRE(const string
& pattern
, Option option
) {
90 Init(pattern
.c_str(), option
, 0, 0, true);
92 PCRE::PCRE(const string
& pattern
, const PCRE_Options
& re_option
) {
93 Init(pattern
.c_str(), re_option
.option(), re_option
.match_limit(),
94 re_option
.stack_limit(), re_option
.report_errors());
97 PCRE::PCRE(const char *pattern
, const PCRE_Options
& re_option
) {
98 Init(pattern
, re_option
.option(), re_option
.match_limit(),
99 re_option
.stack_limit(), re_option
.report_errors());
103 if (re_full_
!= NULL
) pcre_free(re_full_
);
104 if (re_partial_
!= NULL
) pcre_free(re_partial_
);
105 if (error_
!= &empty_string
) delete error_
;
108 pcre
* PCRE::Compile(Anchor anchor
) {
109 // Special treatment for anchoring. This is needed because at
110 // runtime pcre only provides an option for anchoring at the
111 // beginning of a string.
113 // There are three types of anchoring we want:
114 // UNANCHORED Compile the original pattern, and use
115 // a pcre unanchored match.
116 // ANCHOR_START Compile the original pattern, and use
117 // a pcre anchored match.
118 // ANCHOR_BOTH Tack a "\z" to the end of the original pattern
119 // and use a pcre anchored match.
124 if (anchor
!= ANCHOR_BOTH
) {
125 re
= pcre_compile(pattern_
.c_str(),
126 (options_
& EnabledCompileOptions
),
127 &error
, &eoffset
, NULL
);
129 // Tack a '\z' at the end of PCRE. Parenthesize it first so that
130 // the '\z' applies to all top-level alternatives in the regexp.
131 string wrapped
= "(?:"; // A non-counting grouping operator
134 re
= pcre_compile(wrapped
.c_str(),
135 (options_
& EnabledCompileOptions
),
136 &error
, &eoffset
, NULL
);
139 if (error_
== &empty_string
) error_
= new string(error
);
140 PCREPORT(ERROR
) << "Error compiling '" << pattern_
<< "': " << error
;
145 /***** Convenience interfaces *****/
147 bool PCRE::FullMatchFunctor::operator ()(const StringPiece
& text
,
164 const Arg
& a15
) const {
165 const Arg
* args
[kMaxArgs
];
167 if (&a0
== &no_more_args
) goto done
; args
[n
++] = &a0
;
168 if (&a1
== &no_more_args
) goto done
; args
[n
++] = &a1
;
169 if (&a2
== &no_more_args
) goto done
; args
[n
++] = &a2
;
170 if (&a3
== &no_more_args
) goto done
; args
[n
++] = &a3
;
171 if (&a4
== &no_more_args
) goto done
; args
[n
++] = &a4
;
172 if (&a5
== &no_more_args
) goto done
; args
[n
++] = &a5
;
173 if (&a6
== &no_more_args
) goto done
; args
[n
++] = &a6
;
174 if (&a7
== &no_more_args
) goto done
; args
[n
++] = &a7
;
175 if (&a8
== &no_more_args
) goto done
; args
[n
++] = &a8
;
176 if (&a9
== &no_more_args
) goto done
; args
[n
++] = &a9
;
177 if (&a10
== &no_more_args
) goto done
; args
[n
++] = &a10
;
178 if (&a11
== &no_more_args
) goto done
; args
[n
++] = &a11
;
179 if (&a12
== &no_more_args
) goto done
; args
[n
++] = &a12
;
180 if (&a13
== &no_more_args
) goto done
; args
[n
++] = &a13
;
181 if (&a14
== &no_more_args
) goto done
; args
[n
++] = &a14
;
182 if (&a15
== &no_more_args
) goto done
; args
[n
++] = &a15
;
187 return re
.DoMatchImpl(text
, ANCHOR_BOTH
, &consumed
, args
, n
, vec
, kVecSize
);
190 bool PCRE::PartialMatchFunctor::operator ()(const StringPiece
& text
,
207 const Arg
& a15
) const {
208 const Arg
* args
[kMaxArgs
];
210 if (&a0
== &no_more_args
) goto done
; args
[n
++] = &a0
;
211 if (&a1
== &no_more_args
) goto done
; args
[n
++] = &a1
;
212 if (&a2
== &no_more_args
) goto done
; args
[n
++] = &a2
;
213 if (&a3
== &no_more_args
) goto done
; args
[n
++] = &a3
;
214 if (&a4
== &no_more_args
) goto done
; args
[n
++] = &a4
;
215 if (&a5
== &no_more_args
) goto done
; args
[n
++] = &a5
;
216 if (&a6
== &no_more_args
) goto done
; args
[n
++] = &a6
;
217 if (&a7
== &no_more_args
) goto done
; args
[n
++] = &a7
;
218 if (&a8
== &no_more_args
) goto done
; args
[n
++] = &a8
;
219 if (&a9
== &no_more_args
) goto done
; args
[n
++] = &a9
;
220 if (&a10
== &no_more_args
) goto done
; args
[n
++] = &a10
;
221 if (&a11
== &no_more_args
) goto done
; args
[n
++] = &a11
;
222 if (&a12
== &no_more_args
) goto done
; args
[n
++] = &a12
;
223 if (&a13
== &no_more_args
) goto done
; args
[n
++] = &a13
;
224 if (&a14
== &no_more_args
) goto done
; args
[n
++] = &a14
;
225 if (&a15
== &no_more_args
) goto done
; args
[n
++] = &a15
;
230 return re
.DoMatchImpl(text
, UNANCHORED
, &consumed
, args
, n
, vec
, kVecSize
);
233 bool PCRE::ConsumeFunctor::operator ()(StringPiece
* input
,
250 const Arg
& a15
) const {
251 const Arg
* args
[kMaxArgs
];
253 if (&a0
== &no_more_args
) goto done
; args
[n
++] = &a0
;
254 if (&a1
== &no_more_args
) goto done
; args
[n
++] = &a1
;
255 if (&a2
== &no_more_args
) goto done
; args
[n
++] = &a2
;
256 if (&a3
== &no_more_args
) goto done
; args
[n
++] = &a3
;
257 if (&a4
== &no_more_args
) goto done
; args
[n
++] = &a4
;
258 if (&a5
== &no_more_args
) goto done
; args
[n
++] = &a5
;
259 if (&a6
== &no_more_args
) goto done
; args
[n
++] = &a6
;
260 if (&a7
== &no_more_args
) goto done
; args
[n
++] = &a7
;
261 if (&a8
== &no_more_args
) goto done
; args
[n
++] = &a8
;
262 if (&a9
== &no_more_args
) goto done
; args
[n
++] = &a9
;
263 if (&a10
== &no_more_args
) goto done
; args
[n
++] = &a10
;
264 if (&a11
== &no_more_args
) goto done
; args
[n
++] = &a11
;
265 if (&a12
== &no_more_args
) goto done
; args
[n
++] = &a12
;
266 if (&a13
== &no_more_args
) goto done
; args
[n
++] = &a13
;
267 if (&a14
== &no_more_args
) goto done
; args
[n
++] = &a14
;
268 if (&a15
== &no_more_args
) goto done
; args
[n
++] = &a15
;
273 if (pattern
.DoMatchImpl(*input
, ANCHOR_START
, &consumed
,
274 args
, n
, vec
, kVecSize
)) {
275 input
->remove_prefix(consumed
);
282 bool PCRE::FindAndConsumeFunctor::operator ()(StringPiece
* input
,
299 const Arg
& a15
) const {
300 const Arg
* args
[kMaxArgs
];
302 if (&a0
== &no_more_args
) goto done
; args
[n
++] = &a0
;
303 if (&a1
== &no_more_args
) goto done
; args
[n
++] = &a1
;
304 if (&a2
== &no_more_args
) goto done
; args
[n
++] = &a2
;
305 if (&a3
== &no_more_args
) goto done
; args
[n
++] = &a3
;
306 if (&a4
== &no_more_args
) goto done
; args
[n
++] = &a4
;
307 if (&a5
== &no_more_args
) goto done
; args
[n
++] = &a5
;
308 if (&a6
== &no_more_args
) goto done
; args
[n
++] = &a6
;
309 if (&a7
== &no_more_args
) goto done
; args
[n
++] = &a7
;
310 if (&a8
== &no_more_args
) goto done
; args
[n
++] = &a8
;
311 if (&a9
== &no_more_args
) goto done
; args
[n
++] = &a9
;
312 if (&a10
== &no_more_args
) goto done
; args
[n
++] = &a10
;
313 if (&a11
== &no_more_args
) goto done
; args
[n
++] = &a11
;
314 if (&a12
== &no_more_args
) goto done
; args
[n
++] = &a12
;
315 if (&a13
== &no_more_args
) goto done
; args
[n
++] = &a13
;
316 if (&a14
== &no_more_args
) goto done
; args
[n
++] = &a14
;
317 if (&a15
== &no_more_args
) goto done
; args
[n
++] = &a15
;
322 if (pattern
.DoMatchImpl(*input
, UNANCHORED
, &consumed
,
323 args
, n
, vec
, kVecSize
)) {
324 input
->remove_prefix(consumed
);
331 bool PCRE::Replace(string
*str
,
333 const StringPiece
& rewrite
) {
335 int matches
= pattern
.TryMatch(*str
, 0, UNANCHORED
, true, vec
, kVecSize
);
340 if (!pattern
.Rewrite(&s
, rewrite
, *str
, vec
, matches
))
345 str
->replace(vec
[0], vec
[1] - vec
[0], s
);
349 int PCRE::GlobalReplace(string
*str
,
351 const StringPiece
& rewrite
) {
356 bool last_match_was_empty_string
= false;
358 for (; start
<= str
->length();) {
359 // If the previous match was for the empty string, we shouldn't
360 // just match again: we'll match in the same way and get an
361 // infinite loop. Instead, we do the match in a special way:
362 // anchored -- to force another try at the same position --
363 // and with a flag saying that this time, ignore empty matches.
364 // If this special match returns, that means there's a non-empty
365 // match at this position as well, and we can continue. If not,
366 // we do what perl does, and just advance by one.
367 // Notice that perl prints '@@@' for this;
368 // perl -le '$_ = "aa"; s/b*|aa/@/g; print'
370 if (last_match_was_empty_string
) {
371 matches
= pattern
.TryMatch(*str
, start
, ANCHOR_START
, false,
374 if (start
< str
->length())
375 out
.push_back((*str
)[start
]);
377 last_match_was_empty_string
= false;
381 matches
= pattern
.TryMatch(*str
, start
, UNANCHORED
, true, vec
, kVecSize
);
385 int matchstart
= vec
[0], matchend
= vec
[1];
386 assert(matchstart
>= start
);
387 assert(matchend
>= matchstart
);
389 out
.append(*str
, start
, matchstart
- start
);
390 pattern
.Rewrite(&out
, rewrite
, *str
, vec
, matches
);
393 last_match_was_empty_string
= (matchstart
== matchend
);
399 if (start
< str
->length())
400 out
.append(*str
, start
, str
->length() - start
);
405 bool PCRE::Extract(const StringPiece
&text
,
407 const StringPiece
&rewrite
,
410 int matches
= pattern
.TryMatch(text
, 0, UNANCHORED
, true, vec
, kVecSize
);
414 return pattern
.Rewrite(out
, rewrite
, text
, vec
, matches
);
417 string
PCRE::QuoteMeta(const StringPiece
& unquoted
) {
419 result
.reserve(unquoted
.size() << 1);
421 // Escape any ascii character not in [A-Za-z_0-9].
423 // Note that it's legal to escape a character even if it has no
424 // special meaning in a regular expression -- so this function does
425 // that. (This also makes it identical to the perl function of the
426 // same name except for the null-character special case;
427 // see `perldoc -f quotemeta`.)
428 for (int ii
= 0; ii
< unquoted
.length(); ++ii
) {
429 // Note that using 'isalnum' here raises the benchmark time from
431 if ((unquoted
[ii
] < 'a' || unquoted
[ii
] > 'z') &&
432 (unquoted
[ii
] < 'A' || unquoted
[ii
] > 'Z') &&
433 (unquoted
[ii
] < '0' || unquoted
[ii
] > '9') &&
434 unquoted
[ii
] != '_' &&
435 // If this is the part of a UTF8 or Latin1 character, we need
436 // to copy this byte without escaping. Experimentally this is
437 // what works correctly with the regexp library.
438 !(unquoted
[ii
] & 128)) {
439 if (unquoted
[ii
] == '\0') { // Special handling for null chars.
440 // Can't use "\\0" since the next character might be a digit.
446 result
+= unquoted
[ii
];
452 /***** Actual matching and rewriting code *****/
454 bool PCRE::HitLimit() {
458 void PCRE::ClearHitLimit() {
462 int PCRE::TryMatch(const StringPiece
& text
,
468 pcre
* re
= (anchor
== ANCHOR_BOTH
) ? re_full_
: re_partial_
;
470 PCREPORT(ERROR
) << "Matching against invalid re: " << *error_
;
474 int match_limit
= match_limit_
;
475 if (match_limit
<= 0) {
476 match_limit
= FLAGS_regexp_match_limit
;
479 int stack_limit
= stack_limit_
;
480 if (stack_limit
<= 0) {
481 stack_limit
= FLAGS_regexp_stack_limit
;
484 pcre_extra extra
= { 0 };
485 if (match_limit
> 0) {
486 extra
.flags
|= PCRE_EXTRA_MATCH_LIMIT
;
487 extra
.match_limit
= match_limit
;
489 if (stack_limit
> 0) {
490 extra
.flags
|= PCRE_EXTRA_MATCH_LIMIT_RECURSION
;
491 extra
.match_limit_recursion
= stack_limit
/ kPCREFrameSize
;
495 if (anchor
!= UNANCHORED
)
496 options
|= PCRE_ANCHORED
;
498 options
|= PCRE_NOTEMPTY
;
500 int rc
= pcre_exec(re
, // The regular expression object
502 (text
.data() == NULL
) ? "" : text
.data(),
511 // pcre_exec() returns 0 as a special case when the number of
512 // capturing subpatterns exceeds the size of the vector.
513 // When this happens, there is a match and the output vector
514 // is filled, but we miss out on the positions of the extra subpatterns.
518 case PCRE_ERROR_NOMATCH
:
520 case PCRE_ERROR_MATCHLIMIT
:
521 // Writing to hit_limit is not safe if multiple threads
522 // are using the PCRE, but the flag is only intended
523 // for use by unit tests anyway, so we let it go.
525 PCREPORT(WARNING
) << "Exceeded match limit of " << match_limit
526 << " when matching '" << pattern_
<< "'"
527 << " against text that is " << text
.size() << " bytes.";
529 case PCRE_ERROR_RECURSIONLIMIT
:
530 // See comment about hit_limit above.
532 PCREPORT(WARNING
) << "Exceeded stack limit of " << stack_limit
533 << " when matching '" << pattern_
<< "'"
534 << " against text that is " << text
.size() << " bytes.";
537 // There are other return codes from pcre.h :
538 // PCRE_ERROR_NULL (-2)
539 // PCRE_ERROR_BADOPTION (-3)
540 // PCRE_ERROR_BADMAGIC (-4)
541 // PCRE_ERROR_UNKNOWN_NODE (-5)
542 // PCRE_ERROR_NOMEMORY (-6)
543 // PCRE_ERROR_NOSUBSTRING (-7)
545 PCREPORT(ERROR
) << "Unexpected return code: " << rc
546 << " when matching '" << pattern_
<< "'"
550 << ", vecsize=" << vecsize
;
558 bool PCRE::DoMatchImpl(const StringPiece
& text
,
561 const Arg
* const* args
,
565 assert((1 + n
) * 3 <= vecsize
); // results + PCRE workspace
566 int matches
= TryMatch(text
, 0, anchor
, true, vec
, vecsize
);
567 assert(matches
>= 0); // TryMatch never returns negatives
573 if (n
== 0 || args
== NULL
) {
574 // We are not interested in results
577 if (NumberOfCapturingGroups() < n
) {
578 // PCRE has fewer capturing groups than number of arg pointers passed in
582 // If we got here, we must have matched the whole pattern.
583 // We do not need (can not do) any more checks on the value of 'matches' here
584 // -- see the comment for TryMatch.
585 for (int i
= 0; i
< n
; i
++) {
586 const int start
= vec
[2*(i
+1)];
587 const int limit
= vec
[2*(i
+1)+1];
588 if (!args
[i
]->Parse(text
.data() + start
, limit
-start
)) {
589 // TODO: Should we indicate what the error was?
597 bool PCRE::DoMatch(const StringPiece
& text
,
600 const Arg
* const args
[],
603 size_t const vecsize
= (1 + n
) * 3; // results + PCRE workspace
605 int *vec
= new int[vecsize
];
606 bool b
= DoMatchImpl(text
, anchor
, consumed
, args
, n
, vec
, vecsize
);
611 bool PCRE::Rewrite(string
*out
, const StringPiece
&rewrite
,
612 const StringPiece
&text
, int *vec
, int veclen
) const {
613 int number_of_capturing_groups
= NumberOfCapturingGroups();
614 for (const char *s
= rewrite
.data(), *end
= s
+ rewrite
.size();
622 if (n
<= number_of_capturing_groups
) {
623 // unmatched optional capturing group. treat
624 // its value as empty string; i.e., nothing to append.
626 PCREPORT(ERROR
) << "requested group " << n
627 << " in regexp " << rewrite
.data();
631 int start
= vec
[2 * n
];
633 out
->append(text
.data() + start
, vec
[2 * n
+ 1] - start
);
634 } else if (c
== '\\') {
635 out
->push_back('\\');
637 PCREPORT(ERROR
) << "invalid rewrite pattern: " << rewrite
.data();
647 bool PCRE::CheckRewriteString(const StringPiece
& rewrite
, string
* error
) const {
649 for (const char *s
= rewrite
.data(), *end
= s
+ rewrite
.size();
656 *error
= "Rewrite schema error: '\\' not allowed at end.";
664 *error
= "Rewrite schema error: "
665 "'\\' must be followed by a digit or '\\'.";
674 if (max_token
> NumberOfCapturingGroups()) {
675 SStringPrintf(error
, "Rewrite schema requests %d matches, "
676 "but the regexp only has %d parenthesized subexpressions.",
677 max_token
, NumberOfCapturingGroups());
684 // Return the number of capturing subpatterns, or -1 if the
685 // regexp wasn't valid on construction.
686 int PCRE::NumberOfCapturingGroups() const {
687 if (re_partial_
== NULL
) return -1;
690 CHECK(pcre_fullinfo(re_partial_
, // The regular expression object
691 NULL
, // We did not study the pattern
692 PCRE_INFO_CAPTURECOUNT
,
698 /***** Parsers for various types *****/
700 bool PCRE::Arg::parse_null(const char* str
, int n
, void* dest
) {
701 // We fail if somebody asked us to store into a non-NULL void* pointer
702 return (dest
== NULL
);
705 bool PCRE::Arg::parse_string(const char* str
, int n
, void* dest
) {
706 if (dest
== NULL
) return true;
707 reinterpret_cast<string
*>(dest
)->assign(str
, n
);
711 bool PCRE::Arg::parse_stringpiece(const char* str
, int n
, void* dest
) {
712 if (dest
== NULL
) return true;
713 reinterpret_cast<StringPiece
*>(dest
)->set(str
, n
);
717 bool PCRE::Arg::parse_char(const char* str
, int n
, void* dest
) {
718 if (n
!= 1) return false;
719 if (dest
== NULL
) return true;
720 *(reinterpret_cast<char*>(dest
)) = str
[0];
724 bool PCRE::Arg::parse_uchar(const char* str
, int n
, void* dest
) {
725 if (n
!= 1) return false;
726 if (dest
== NULL
) return true;
727 *(reinterpret_cast<unsigned char*>(dest
)) = str
[0];
731 // Largest number spec that we are willing to parse
732 static const int kMaxNumberLength
= 32;
734 // PCREQUIPCRES "buf" must have length at least kMaxNumberLength+1
735 // PCREQUIPCRES "n > 0"
736 // Copies "str" into "buf" and null-terminates if necessary.
738 // a. "str" if no termination is needed
739 // b. "buf" if the string was copied and null-terminated
740 // c. "" if the input was invalid and has no hope of being parsed
741 static const char* TerminateNumber(char* buf
, const char* str
, int n
) {
742 if ((n
> 0) && isspace(*str
)) {
743 // We are less forgiving than the strtoxxx() routines and do not
744 // allow leading spaces.
748 // See if the character right after the input text may potentially
749 // look like a digit.
750 if (isdigit(str
[n
]) ||
751 ((str
[n
] >= 'a') && (str
[n
] <= 'f')) ||
752 ((str
[n
] >= 'A') && (str
[n
] <= 'F'))) {
753 if (n
> kMaxNumberLength
) return ""; // Input too big to be a valid number
758 // We can parse right out of the supplied string, so return it.
763 bool PCRE::Arg::parse_long_radix(const char* str
,
767 if (n
== 0) return false;
768 char buf
[kMaxNumberLength
+1];
769 str
= TerminateNumber(buf
, str
, n
);
772 long r
= strtol(str
, &end
, radix
);
773 if (end
!= str
+ n
) return false; // Leftover junk
774 if (errno
) return false;
775 if (dest
== NULL
) return true;
776 *(reinterpret_cast<long*>(dest
)) = r
;
780 bool PCRE::Arg::parse_ulong_radix(const char* str
,
784 if (n
== 0) return false;
785 char buf
[kMaxNumberLength
+1];
786 str
= TerminateNumber(buf
, str
, n
);
788 // strtoul() will silently accept negative numbers and parse
789 // them. This module is more strict and treats them as errors.
795 unsigned long r
= strtoul(str
, &end
, radix
);
796 if (end
!= str
+ n
) return false; // Leftover junk
797 if (errno
) return false;
798 if (dest
== NULL
) return true;
799 *(reinterpret_cast<unsigned long*>(dest
)) = r
;
803 bool PCRE::Arg::parse_short_radix(const char* str
,
808 if (!parse_long_radix(str
, n
, &r
, radix
)) return false; // Could not parse
809 if ((short)r
!= r
) return false; // Out of range
810 if (dest
== NULL
) return true;
811 *(reinterpret_cast<short*>(dest
)) = r
;
815 bool PCRE::Arg::parse_ushort_radix(const char* str
,
820 if (!parse_ulong_radix(str
, n
, &r
, radix
)) return false; // Could not parse
821 if ((ushort
)r
!= r
) return false; // Out of range
822 if (dest
== NULL
) return true;
823 *(reinterpret_cast<unsigned short*>(dest
)) = r
;
827 bool PCRE::Arg::parse_int_radix(const char* str
,
832 if (!parse_long_radix(str
, n
, &r
, radix
)) return false; // Could not parse
833 if ((int)r
!= r
) return false; // Out of range
834 if (dest
== NULL
) return true;
835 *(reinterpret_cast<int*>(dest
)) = r
;
839 bool PCRE::Arg::parse_uint_radix(const char* str
,
844 if (!parse_ulong_radix(str
, n
, &r
, radix
)) return false; // Could not parse
845 if ((uint
)r
!= r
) return false; // Out of range
846 if (dest
== NULL
) return true;
847 *(reinterpret_cast<unsigned int*>(dest
)) = r
;
851 bool PCRE::Arg::parse_longlong_radix(const char* str
,
855 if (n
== 0) return false;
856 char buf
[kMaxNumberLength
+1];
857 str
= TerminateNumber(buf
, str
, n
);
860 int64 r
= strtoll(str
, &end
, radix
);
861 if (end
!= str
+ n
) return false; // Leftover junk
862 if (errno
) return false;
863 if (dest
== NULL
) return true;
864 *(reinterpret_cast<int64
*>(dest
)) = r
;
868 bool PCRE::Arg::parse_ulonglong_radix(const char* str
,
872 if (n
== 0) return false;
873 char buf
[kMaxNumberLength
+1];
874 str
= TerminateNumber(buf
, str
, n
);
876 // strtoull() will silently accept negative numbers and parse
877 // them. This module is more strict and treats them as errors.
882 uint64 r
= strtoull(str
, &end
, radix
);
883 if (end
!= str
+ n
) return false; // Leftover junk
884 if (errno
) return false;
885 if (dest
== NULL
) return true;
886 *(reinterpret_cast<uint64
*>(dest
)) = r
;
890 bool PCRE::Arg::parse_double(const char* str
, int n
, void* dest
) {
891 if (n
== 0) return false;
892 static const int kMaxLength
= 200;
893 char buf
[kMaxLength
];
894 if (n
>= kMaxLength
) return false;
899 double r
= strtod(buf
, &end
);
900 if (end
!= buf
+ n
) {
902 // Microsoft's strtod() doesn't handle inf and nan, so we have to
903 // handle it explicitly. Speed is not important here because this
904 // code is only called in unit tests.
910 } else if ('+' == *i
) {
913 if (0 == stricmp(i
, "inf") || 0 == stricmp(i
, "infinity")) {
914 r
= numeric_limits
<double>::infinity();
917 } else if (0 == stricmp(i
, "nan")) {
918 r
= numeric_limits
<double>::quiet_NaN();
923 return false; // Leftover junk
926 if (errno
) return false;
927 if (dest
== NULL
) return true;
928 *(reinterpret_cast<double*>(dest
)) = r
;
932 bool PCRE::Arg::parse_float(const char* str
, int n
, void* dest
) {
934 if (!parse_double(str
, n
, &r
)) return false;
935 if (dest
== NULL
) return true;
936 *(reinterpret_cast<float*>(dest
)) = static_cast<float>(r
);
941 #define DEFINE_INTEGER_PARSERS(name) \
942 bool PCRE::Arg::parse_##name(const char* str, int n, void* dest) { \
943 return parse_##name##_radix(str, n, dest, 10); \
945 bool PCRE::Arg::parse_##name##_hex(const char* str, int n, void* dest) { \
946 return parse_##name##_radix(str, n, dest, 16); \
948 bool PCRE::Arg::parse_##name##_octal(const char* str, int n, void* dest) { \
949 return parse_##name##_radix(str, n, dest, 8); \
951 bool PCRE::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
952 return parse_##name##_radix(str, n, dest, 0); \
955 DEFINE_INTEGER_PARSERS(short);
956 DEFINE_INTEGER_PARSERS(ushort
);
957 DEFINE_INTEGER_PARSERS(int);
958 DEFINE_INTEGER_PARSERS(uint
);
959 DEFINE_INTEGER_PARSERS(long);
960 DEFINE_INTEGER_PARSERS(ulong
);
961 DEFINE_INTEGER_PARSERS(longlong
);
962 DEFINE_INTEGER_PARSERS(ulonglong
);
964 #undef DEFINE_INTEGER_PARSERS