1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/safe_sprintf.h"
10 // In debug builds, we use RAW_CHECK() to print useful error messages, if
11 // SafeSPrintf() is called with broken arguments.
12 // As our contract promises that SafeSPrintf() can be called from any
13 // restricted run-time context, it is not actually safe to call logging
14 // functions from it; and we only ever do so for debug builds and hope for the
15 // best. We should _never_ call any logging function other than RAW_CHECK(),
16 // and we should _never_ include any logging code that is active in production
17 // builds. Most notably, we should not include these logging functions in
18 // unofficial release builds, even though those builds would otherwise have
20 // In other words; please do not remove the #ifdef around this #include.
21 // Instead, in production builds we opt for returning a degraded result,
22 // whenever an error is encountered.
23 // E.g. The broken function call
24 // SafeSPrintf("errno = %d (%x)", errno, strerror(errno))
25 // will print something like
28 // errno = 13 (Access denied)
29 // In most of the anticipated use cases, that's probably the preferred
31 #include "base/logging.h"
32 #define DEBUG_CHECK RAW_CHECK
34 #define DEBUG_CHECK(x) do { if (x) { } } while (0)
40 // The code in this file is extremely careful to be async-signal-safe.
42 // Most obviously, we avoid calling any code that could dynamically allocate
43 // memory. Doing so would almost certainly result in bugs and dead-locks.
44 // We also avoid calling any other STL functions that could have unintended
45 // side-effects involving memory allocation or access to other shared
48 // But on top of that, we also avoid calling other library functions, as many
49 // of them have the side-effect of calling getenv() (in order to deal with
50 // localization) or accessing errno. The latter sounds benign, but there are
51 // several execution contexts where it isn't even possible to safely read let
54 // The stated design goal of the SafeSPrintf() function is that it can be
55 // called from any context that can safely call C or C++ code (i.e. anything
56 // that doesn't require assembly code).
58 // For a brief overview of some but not all of the issues with async-signal-
60 // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
63 const size_t kSSizeMaxConst
= ((size_t)(ssize_t
)-1) >> 1;
65 const char kUpCaseHexDigits
[] = "0123456789ABCDEF";
66 const char kDownCaseHexDigits
[] = "0123456789abcdef";
70 // We would like to define kSSizeMax as std::numeric_limits<ssize_t>::max(),
71 // but C++ doesn't allow us to do that for constants. Instead, we have to
72 // use careful casting and shifting. We later use a COMPILE_ASSERT to
73 // verify that this worked correctly.
75 const size_t kSSizeMax
= kSSizeMaxConst
;
77 #else // defined(NDEBUG)
78 // For efficiency, we really need kSSizeMax to be a constant. But for unit
79 // tests, it should be adjustable. This allows us to verify edge cases without
80 // having to fill the entire available address space. As a compromise, we make
81 // kSSizeMax adjustable in debug builds, and then only compile that particular
82 // part of the unit test in debug builds.
84 static size_t kSSizeMax
= kSSizeMaxConst
;
88 void SetSafeSPrintfSSizeMaxForTest(size_t max
) {
92 size_t GetSafeSPrintfSSizeMaxForTest() {
96 #endif // defined(NDEBUG)
101 // |buffer| is caller-allocated storage that SafeSPrintf() writes to. It
102 // has |size| bytes of writable storage. It is the caller's responsibility
103 // to ensure that the buffer is at least one byte in size, so that it fits
104 // the trailing NUL that will be added by the destructor. The buffer also
105 // must be smaller or equal to kSSizeMax in size.
106 Buffer(char* buffer
, size_t size
)
108 size_(size
- 1), // Account for trailing NUL byte
110 // The following assertion does not build on Mac and Android. This is because
111 // static_assert only works with compile-time constants, but mac uses
112 // libstdc++4.2 and android uses stlport, which both don't mark
113 // numeric_limits::max() as constexp. Likewise, MSVS2013's standard library
114 // also doesn't mark max() as constexpr yet. cl.exe supports static_cast but
115 // doesn't really implement constexpr yet so it doesn't complain, but clang
117 #if __cplusplus >= 201103 && !defined(OS_ANDROID) && !defined(OS_MACOSX) && \
118 !defined(OS_IOS) && !(defined(__clang__) && defined(OS_WIN))
119 COMPILE_ASSERT(kSSizeMaxConst
== \
120 static_cast<size_t>(std::numeric_limits
<ssize_t
>::max()),
121 kSSizeMax_is_the_max_value_of_an_ssize_t
);
123 DEBUG_CHECK(size
> 0);
124 DEBUG_CHECK(size
<= kSSizeMax
);
128 // The code calling the constructor guaranteed that there was enough space
129 // to store a trailing NUL -- and in debug builds, we are actually
130 // verifying this with DEBUG_CHECK()s in the constructor. So, we can
131 // always unconditionally write the NUL byte in the destructor. We do not
132 // need to adjust the count_, as SafeSPrintf() copies snprintf() in not
133 // including the NUL byte in its return code.
134 *GetInsertionPoint() = '\000';
137 // Returns true, iff the buffer is filled all the way to |kSSizeMax-1|. The
138 // caller can now stop adding more data, as GetCount() has reached its
139 // maximum possible value.
140 inline bool OutOfAddressableSpace() const {
141 return count_
== static_cast<size_t>(kSSizeMax
- 1);
144 // Returns the number of bytes that would have been emitted to |buffer_|
145 // if it was sized sufficiently large. This number can be larger than
146 // |size_|, if the caller provided an insufficiently large output buffer.
147 // But it will never be bigger than |kSSizeMax-1|.
148 inline ssize_t
GetCount() const {
149 DEBUG_CHECK(count_
< kSSizeMax
);
150 return static_cast<ssize_t
>(count_
);
153 // Emits one |ch| character into the |buffer_| and updates the |count_| of
154 // characters that are currently supposed to be in the buffer.
155 // Returns "false", iff the buffer was already full.
156 // N.B. |count_| increases even if no characters have been written. This is
157 // needed so that GetCount() can return the number of bytes that should
158 // have been allocated for the |buffer_|.
159 inline bool Out(char ch
) {
160 if (size_
>= 1 && count_
< size_
) {
161 buffer_
[count_
] = ch
;
162 return IncrementCountByOne();
164 // |count_| still needs to be updated, even if the buffer has been
165 // filled completely. This allows SafeSPrintf() to return the number of
166 // bytes that should have been emitted.
167 IncrementCountByOne();
171 // Inserts |padding|-|len| bytes worth of padding into the |buffer_|.
172 // |count_| will also be incremented by the number of bytes that were meant
173 // to be emitted. The |pad| character is typically either a ' ' space
174 // or a '0' zero, but other non-NUL values are legal.
175 // Returns "false", iff the the |buffer_| filled up (i.e. |count_|
176 // overflowed |size_|) at any time during padding.
177 inline bool Pad(char pad
, size_t padding
, size_t len
) {
179 DEBUG_CHECK(padding
>= 0 && padding
<= kSSizeMax
);
180 DEBUG_CHECK(len
>= 0);
181 for (; padding
> len
; --padding
) {
184 IncrementCount(padding
-len
);
192 // POSIX doesn't define any async-signal-safe function for converting
193 // an integer to ASCII. Define our own version.
195 // This also gives us the ability to make the function a little more
196 // powerful and have it deal with |padding|, with truncation, and with
197 // predicting the length of the untruncated output.
199 // IToASCII() converts an integer |i| to ASCII.
201 // Unlike similar functions in the standard C library, it never appends a
202 // NUL character. This is left for the caller to do.
204 // While the function signature takes a signed int64_t, the code decides at
205 // run-time whether to treat the argument as signed (int64_t) or as unsigned
206 // (uint64_t) based on the value of |sign|.
208 // It supports |base|s 2 through 16. Only a |base| of 10 is allowed to have
209 // a |sign|. Otherwise, |i| is treated as unsigned.
211 // For bases larger than 10, |upcase| decides whether lower-case or upper-
212 // case letters should be used to designate digits greater than 10.
214 // Padding can be done with either '0' zeros or ' ' spaces. Padding has to
215 // be positive and will always be applied to the left of the output.
217 // Prepends a |prefix| to the number (e.g. "0x"). This prefix goes to
218 // the left of |padding|, if |pad| is '0'; and to the right of |padding|
221 // Returns "false", if the |buffer_| overflowed at any time.
222 bool IToASCII(bool sign
, bool upcase
, int64_t i
, int base
,
223 char pad
, size_t padding
, const char* prefix
);
226 // Increments |count_| by |inc| unless this would cause |count_| to
227 // overflow |kSSizeMax-1|. Returns "false", iff an overflow was detected;
228 // it then clamps |count_| to |kSSizeMax-1|.
229 inline bool IncrementCount(size_t inc
) {
230 // "inc" is either 1 or a "padding" value. Padding is clamped at
231 // run-time to at most kSSizeMax-1. So, we know that "inc" is always in
232 // the range 1..kSSizeMax-1.
233 // This allows us to compute "kSSizeMax - 1 - inc" without incurring any
234 // integer overflows.
235 DEBUG_CHECK(inc
<= kSSizeMax
- 1);
236 if (count_
> kSSizeMax
- 1 - inc
) {
237 count_
= kSSizeMax
- 1;
245 // Convenience method for the common case of incrementing |count_| by one.
246 inline bool IncrementCountByOne() {
247 return IncrementCount(1);
250 // Return the current insertion point into the buffer. This is typically
251 // at |buffer_| + |count_|, but could be before that if truncation
252 // happened. It always points to one byte past the last byte that was
253 // successfully placed into the |buffer_|.
254 inline char* GetInsertionPoint() const {
259 return buffer_
+ idx
;
262 // User-provided buffer that will receive the fully formatted output string.
265 // Number of bytes that are available in the buffer excluding the trailing
266 // NUL byte that will be added by the destructor.
269 // Number of bytes that would have been emitted to the buffer, if the buffer
270 // was sufficiently big. This number always excludes the trailing NUL byte
271 // and it is guaranteed to never grow bigger than kSSizeMax-1.
274 DISALLOW_COPY_AND_ASSIGN(Buffer
);
278 bool Buffer::IToASCII(bool sign
, bool upcase
, int64_t i
, int base
,
279 char pad
, size_t padding
, const char* prefix
) {
280 // Sanity check for parameters. None of these should ever fail, but see
281 // above for the rationale why we can't call CHECK().
282 DEBUG_CHECK(base
>= 2);
283 DEBUG_CHECK(base
<= 16);
284 DEBUG_CHECK(!sign
|| base
== 10);
285 DEBUG_CHECK(pad
== '0' || pad
== ' ');
286 DEBUG_CHECK(padding
>= 0);
287 DEBUG_CHECK(padding
<= kSSizeMax
);
288 DEBUG_CHECK(!(sign
&& prefix
&& *prefix
));
290 // Handle negative numbers, if the caller indicated that |i| should be
291 // treated as a signed number; otherwise treat |i| as unsigned (even if the
293 // Details are tricky, because of limited data-types, but equivalent pseudo-
294 // code would look like:
295 // if (sign && i < 0)
303 // Turn our number positive.
304 if (i
== std::numeric_limits
<int64_t>::min()) {
305 // The most negative integer needs special treatment.
307 num
= static_cast<uint64_t>(-(i
+ 1));
309 // "Normal" negative numbers are easy.
310 num
= static_cast<uint64_t>(-i
);
313 num
= static_cast<uint64_t>(i
);
316 // If padding with '0' zero, emit the prefix or '-' character now. Otherwise,
317 // make the prefix accessible in reverse order, so that we can later output
318 // it right between padding and the number.
319 // We cannot choose the easier approach of just reversing the number, as that
320 // fails in situations where we need to truncate numbers that have padding
322 const char* reverse_prefix
= NULL
;
323 if (prefix
&& *prefix
) {
333 for (reverse_prefix
= prefix
; *reverse_prefix
; ++reverse_prefix
) {
338 const size_t prefix_length
= reverse_prefix
- prefix
;
340 // Loop until we have converted the entire number. Output at least one
341 // character (i.e. '0').
342 size_t start
= count_
;
343 size_t discarded
= 0;
344 bool started
= false;
346 // Make sure there is still enough space left in our output buffer.
347 if (count_
>= size_
) {
349 // It is rare that we need to output a partial number. But if asked
350 // to do so, we will still make sure we output the correct number of
352 // Since we are generating the digits in reverse order, we actually
353 // have to discard digits in the order that we have already emitted
354 // them. This is essentially equivalent to:
355 // memmove(buffer_ + start, buffer_ + start + 1, size_ - start - 1)
356 for (char* move
= buffer_
+ start
, *end
= buffer_
+ size_
- 1;
363 } else if (count_
- size_
> 1) {
364 // Need to increment either |count_| or |discarded| to make progress.
365 // The latter is more efficient, as it eventually triggers fast
366 // handling of padding. But we have to ensure we don't accidentally
367 // change the overall state (i.e. switch the state-machine from
368 // discarding to non-discarding). |count_| needs to always stay
369 // bigger than |size_|.
375 // Output the next digit and (if necessary) compensate for the most
376 // negative integer needing special treatment. This works because,
377 // no matter the bit width of the integer, the lowest-most decimal
378 // integer always ends in 2, 4, 6, or 8.
379 if (!num
&& started
) {
380 if (reverse_prefix
> prefix
) {
381 Out(*--reverse_prefix
);
387 Out((upcase
? kUpCaseHexDigits
: kDownCaseHexDigits
)[num
%base
+ minint
]);
393 // Add padding, if requested.
397 // Performance optimization for when we are asked to output excessive
398 // padding, but our output buffer is limited in size. Even if we output
399 // a 64bit number in binary, we would never write more than 64 plus
400 // prefix non-padding characters. So, once this limit has been passed,
401 // any further state change can be computed arithmetically; we know that
402 // by this time, our entire final output consists of padding characters
403 // that have all already been output.
404 if (discarded
> 8*sizeof(num
) + prefix_length
) {
405 IncrementCount(padding
);
409 } while (num
|| padding
|| (reverse_prefix
> prefix
));
411 // Conversion to ASCII actually resulted in the digits being in reverse
412 // order. We can't easily generate them in forward order, as we can't tell
413 // the number of characters needed until we are done converting.
414 // So, now, we reverse the string (except for the possible '-' sign).
415 char* front
= buffer_
+ start
;
416 char* back
= GetInsertionPoint();
417 while (--back
> front
) {
423 IncrementCount(discarded
);
427 } // anonymous namespace
431 ssize_t
SafeSNPrintf(char* buf
, size_t sz
, const char* fmt
, const Arg
* args
,
432 const size_t max_args
) {
433 // Make sure that at least one NUL byte can be written, and that the buffer
434 // never overflows kSSizeMax. Not only does that use up most or all of the
435 // address space, it also would result in a return code that cannot be
437 if (static_cast<ssize_t
>(sz
) < 1) {
439 } else if (sz
> kSSizeMax
) {
443 // Iterate over format string and interpret '%' arguments as they are
445 Buffer
buffer(buf
, sz
);
448 for (unsigned int cur_arg
= 0; *fmt
&& !buffer
.OutOfAddressableSpace(); ) {
453 format_character_found
:
455 case '0': case '1': case '2': case '3': case '4':
456 case '5': case '6': case '7': case '8': case '9':
457 // Found a width parameter. Convert to an integer value and store in
458 // "padding". If the leading digit is a zero, change the padding
459 // character from a space ' ' to a zero '0'.
460 pad
= ch
== '0' ? '0' : ' ';
462 // The maximum allowed padding fills all the available address
463 // space and leaves just enough space to insert the trailing NUL.
464 const size_t max_padding
= kSSizeMax
- 1;
465 if (padding
> max_padding
/10 ||
466 10*padding
> max_padding
- (ch
- '0')) {
467 DEBUG_CHECK(padding
<= max_padding
/10 &&
468 10*padding
<= max_padding
- (ch
- '0'));
469 // Integer overflow detected. Skip the rest of the width until
470 // we find the format character, then do the normal error handling.
472 padding
= max_padding
;
473 while ((ch
= *fmt
++) >= '0' && ch
<= '9') {
475 if (cur_arg
< max_args
) {
480 padding
= 10*padding
+ ch
- '0';
481 if (padding
> max_padding
) {
482 // This doesn't happen for "sane" values of kSSizeMax. But once
483 // kSSizeMax gets smaller than about 10, our earlier range checks
484 // are incomplete. Unittests do trigger this artificial corner
486 DEBUG_CHECK(padding
<= max_padding
);
487 goto padding_overflow
;
490 if (ch
< '0' || ch
> '9') {
491 // Reached the end of the width parameter. This is where the format
492 // character is found.
493 goto format_character_found
;
497 case 'c': { // Output an ASCII character.
498 // Check that there are arguments left to be inserted.
499 if (cur_arg
>= max_args
) {
500 DEBUG_CHECK(cur_arg
< max_args
);
504 // Check that the argument has the expected type.
505 const Arg
& arg
= args
[cur_arg
++];
506 if (arg
.type
!= Arg::INT
&& arg
.type
!= Arg::UINT
) {
507 DEBUG_CHECK(arg
.type
== Arg::INT
|| arg
.type
== Arg::UINT
);
511 // Apply padding, if needed.
512 buffer
.Pad(' ', padding
, 1);
514 // Convert the argument to an ASCII character and output it.
515 char ch
= static_cast<char>(arg
.integer
.i
);
517 goto end_of_output_buffer
;
521 case 'd': // Output a possibly signed decimal value.
522 case 'o': // Output an unsigned octal value.
523 case 'x': // Output an unsigned hexadecimal value.
525 case 'p': { // Output a pointer value.
526 // Check that there are arguments left to be inserted.
527 if (cur_arg
>= max_args
) {
528 DEBUG_CHECK(cur_arg
< max_args
);
532 const Arg
& arg
= args
[cur_arg
++];
534 const char* prefix
= NULL
;
536 // Check that the argument has the expected type.
537 if (arg
.type
!= Arg::INT
&& arg
.type
!= Arg::UINT
) {
538 DEBUG_CHECK(arg
.type
== Arg::INT
|| arg
.type
== Arg::UINT
);
544 // The Arg() constructor automatically performed sign expansion on
545 // signed parameters. This is great when outputting a %d decimal
546 // number, but can result in unexpected leading 0xFF bytes when
547 // outputting a %x hexadecimal number. Mask bits, if necessary.
548 // We have to do this here, instead of in the Arg() constructor, as
549 // the Arg() constructor cannot tell whether we will output a %d
550 // or a %x. Only the latter should experience masking.
551 if (arg
.integer
.width
< sizeof(int64_t)) {
552 i
&= (1LL << (8*arg
.integer
.width
)) - 1;
556 // Pointer values require an actual pointer or a string.
557 if (arg
.type
== Arg::POINTER
) {
558 i
= reinterpret_cast<uintptr_t>(arg
.ptr
);
559 } else if (arg
.type
== Arg::STRING
) {
560 i
= reinterpret_cast<uintptr_t>(arg
.str
);
561 } else if (arg
.type
== Arg::INT
&&
562 arg
.integer
.width
== sizeof(NULL
) &&
563 arg
.integer
.i
== 0) { // Allow C++'s version of NULL
566 DEBUG_CHECK(arg
.type
== Arg::POINTER
|| arg
.type
== Arg::STRING
);
570 // Pointers always include the "0x" prefix.
574 // Use IToASCII() to convert to ASCII representation. For decimal
575 // numbers, optionally print a sign. For hexadecimal numbers,
576 // distinguish between upper and lower case. %p addresses are always
577 // printed as upcase. Supports base 8, 10, and 16. Prints padding
578 // and/or prefixes, if so requested.
579 buffer
.IToASCII(ch
== 'd' && arg
.type
== Arg::INT
,
581 ch
== 'o' ? 8 : ch
== 'd' ? 10 : 16,
582 pad
, padding
, prefix
);
585 // Check that there are arguments left to be inserted.
586 if (cur_arg
>= max_args
) {
587 DEBUG_CHECK(cur_arg
< max_args
);
591 // Check that the argument has the expected type.
592 const Arg
& arg
= args
[cur_arg
++];
594 if (arg
.type
== Arg::STRING
) {
595 s
= arg
.str
? arg
.str
: "<NULL>";
596 } else if (arg
.type
== Arg::INT
&& arg
.integer
.width
== sizeof(NULL
) &&
597 arg
.integer
.i
== 0) { // Allow C++'s version of NULL
600 DEBUG_CHECK(arg
.type
== Arg::STRING
);
604 // Apply padding, if needed. This requires us to first check the
605 // length of the string that we are outputting.
608 for (const char* src
= s
; *src
++; ) {
611 buffer
.Pad(' ', padding
, len
);
614 // Printing a string involves nothing more than copying it into the
615 // output buffer and making sure we don't output more bytes than
616 // available space; Out() takes care of doing that.
617 for (const char* src
= s
; *src
; ) {
622 // Quoted percent '%' character.
625 // C++ gives us tools to do type checking -- something that snprintf()
626 // could never really do. So, whenever we see arguments that don't
627 // match up with the format string, we refuse to output them. But
628 // since we have to be extremely conservative about being async-
629 // signal-safe, we are limited in the type of error handling that we
630 // can do in production builds (in debug builds we can use
631 // DEBUG_CHECK() and hope for the best). So, all we do is pass the
632 // format string unchanged. That should eventually get the user's
633 // attention; and in the meantime, it hopefully doesn't lose too much
636 // Unknown or unsupported format character. Just copy verbatim to
641 goto end_of_format_string
;
651 end_of_format_string
:
652 end_of_output_buffer
:
653 return buffer
.GetCount();
656 } // namespace internal
658 ssize_t
SafeSNPrintf(char* buf
, size_t sz
, const char* fmt
) {
659 // Make sure that at least one NUL byte can be written, and that the buffer
660 // never overflows kSSizeMax. Not only does that use up most or all of the
661 // address space, it also would result in a return code that cannot be
663 if (static_cast<ssize_t
>(sz
) < 1) {
665 } else if (sz
> kSSizeMax
) {
669 Buffer
buffer(buf
, sz
);
671 // In the slow-path, we deal with errors by copying the contents of
672 // "fmt" unexpanded. This means, if there are no arguments passed, the
673 // SafeSPrintf() function always degenerates to a version of strncpy() that
674 // de-duplicates '%' characters.
675 const char* src
= fmt
;
676 for (; *src
; ++src
) {
678 DEBUG_CHECK(src
[0] != '%' || src
[1] == '%');
679 if (src
[0] == '%' && src
[1] == '%') {
683 return buffer
.GetCount();
686 } // namespace strings