1 /* Copyright (c) 2001, Matej Pfajfar.
2 * Copyright (c) 2001-2004, Roger Dingledine.
3 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4 * Copyright (c) 2007-2021, The Tor Project, Inc. */
5 /* See LICENSE for licensing information */
10 * \brief Miscellaneous functions for encoding and decoding various things
16 #include "lib/encoding/binascii.h"
17 #include "lib/log/log.h"
18 #include "lib/log/util_bug.h"
19 #include "lib/cc/torint.h"
20 #include "lib/string/compat_ctype.h"
21 #include "lib/intmath/muldiv.h"
22 #include "lib/malloc/malloc.h"
28 /** Return a pointer to a NUL-terminated hexadecimal string encoding
29 * the first <b>fromlen</b> bytes of <b>from</b>. (fromlen must be \<= 32.) The
30 * result does not need to be deallocated, but repeated calls to
31 * hex_str will trash old results.
34 hex_str(const char *from
, size_t fromlen
)
37 if (fromlen
>(sizeof(buf
)-1)/2)
38 fromlen
= (sizeof(buf
)-1)/2;
39 base16_encode(buf
,sizeof(buf
),from
,fromlen
);
43 /* Return the base32 encoded size in bytes using the source length srclen.
45 * (WATCH OUT: This API counts the terminating NUL byte, but
46 * base64_encode_size does not.)
49 base32_encoded_size(size_t srclen
)
52 tor_assert(srclen
< SIZE_T_CEILING
/ 8);
53 enclen
= BASE32_NOPAD_BUFSIZE(srclen
);
54 tor_assert(enclen
< INT_MAX
&& enclen
> srclen
);
58 /** Implements base32 encoding as in RFC 4648. */
60 base32_encode(char *dest
, size_t destlen
, const char *src
, size_t srclen
)
63 size_t nbits
= srclen
* 8;
66 /* We need enough space for the encoded data and the extra NUL byte. */
67 tor_assert(base32_encoded_size(srclen
) <= destlen
);
68 tor_assert(destlen
< SIZE_T_CEILING
);
70 /* Make sure we leave no uninitialized data in the destination buffer. */
71 memset(dest
, 0, destlen
);
73 for (i
=0,bit
=0; bit
< nbits
; ++i
, bit
+=5) {
74 /* set v to the 16-bit value starting at src[bits/8], 0-padded. */
76 v
= ((uint8_t)src
[idx
]) << 8;
78 v
+= (uint8_t)src
[idx
+1];
79 /* set u to the 5-bit value at the bit'th bit of buf. */
80 u
= (v
>> (11-(bit
%8))) & 0x1F;
81 dest
[i
] = BASE32_CHARS
[u
];
86 /** Implements base32 decoding as in RFC 4648.
87 * Return the number of bytes decoded if successful; -1 otherwise.
90 base32_decode(char *dest
, size_t destlen
, const char *src
, size_t srclen
)
92 /* XXXX we might want to rewrite this along the lines of base64_decode, if
93 * it ever shows up in the profile. */
97 nbits
= ((srclen
* 5) / 8) * 8;
99 tor_assert(srclen
< SIZE_T_CEILING
/ 5);
100 tor_assert((nbits
/8) <= destlen
); /* We need enough space. */
101 tor_assert(destlen
< SIZE_T_CEILING
);
103 /* Make sure we leave no uninitialized data in the destination buffer. */
104 memset(dest
, 0, destlen
);
106 /* Convert base32 encoded chars to the 5-bit values that they represent. */
107 tmp
= tor_malloc_zero(srclen
);
108 for (j
= 0; j
< srclen
; ++j
) {
109 if (src
[j
] > 0x60 && src
[j
] < 0x7B) tmp
[j
] = src
[j
] - 0x61;
110 else if (src
[j
] > 0x31 && src
[j
] < 0x38) tmp
[j
] = src
[j
] - 0x18;
111 else if (src
[j
] > 0x40 && src
[j
] < 0x5B) tmp
[j
] = src
[j
] - 0x41;
113 log_warn(LD_GENERAL
, "illegal character in base32 encoded string");
119 /* Assemble result byte-wise by applying five possible cases. */
120 for (i
= 0, bit
= 0; bit
< nbits
; ++i
, bit
+= 8) {
123 dest
[i
] = (((uint8_t)tmp
[(bit
/5)]) << 3) +
124 (((uint8_t)tmp
[(bit
/5)+1]) >> 2);
127 dest
[i
] = (((uint8_t)tmp
[(bit
/5)]) << 6) +
128 (((uint8_t)tmp
[(bit
/5)+1]) << 1) +
129 (((uint8_t)tmp
[(bit
/5)+2]) >> 4);
132 dest
[i
] = (((uint8_t)tmp
[(bit
/5)]) << 4) +
133 (((uint8_t)tmp
[(bit
/5)+1]) >> 1);
136 dest
[i
] = (((uint8_t)tmp
[(bit
/5)]) << 7) +
137 (((uint8_t)tmp
[(bit
/5)+1]) << 2) +
138 (((uint8_t)tmp
[(bit
/5)+2]) >> 3);
141 dest
[i
] = (((uint8_t)tmp
[(bit
/5)]) << 5) +
142 ((uint8_t)tmp
[(bit
/5)+1]);
147 memset(tmp
, 0, srclen
); /* on the heap, this should be safe */
153 #define BASE64_OPENSSL_LINELEN 64
155 /** Return the Base64 encoded size of <b>srclen</b> bytes of data in
158 * (WATCH OUT: This API <em>does not</em> count the terminating NUL byte,
159 * but base32_encoded_size does.)
161 * If <b>flags</b>&BASE64_ENCODE_MULTILINE is true, return the size
162 * of the encoded output as multiline output (64 character, `\n' terminated
166 base64_encode_size(size_t srclen
, int flags
)
170 /* Use INT_MAX for overflow checking because base64_encode() returns int. */
171 tor_assert(srclen
< INT_MAX
);
172 tor_assert(CEIL_DIV(srclen
, 3) < INT_MAX
/ 4);
174 enclen
= BASE64_LEN(srclen
);
175 if (flags
& BASE64_ENCODE_MULTILINE
)
176 enclen
+= CEIL_DIV(enclen
, BASE64_OPENSSL_LINELEN
);
178 tor_assert(enclen
< INT_MAX
&& (enclen
== 0 || enclen
> srclen
));
182 /** Return an upper bound on the number of bytes that might be needed to hold
183 * the data from decoding the base64 string <b>srclen</b>. This is only an
184 * upper bound, since some part of the base64 string might be padding or
187 base64_decode_maxsize(size_t srclen
)
189 tor_assert(srclen
< INT_MAX
/ 3);
191 return CEIL_DIV(srclen
* 3, 4);
194 /** Internal table mapping 6 bit values to the Base64 alphabet. */
195 static const char base64_encode_table
[64] = {
196 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
197 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
198 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
199 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
200 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
201 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
202 'w', 'x', 'y', 'z', '0', '1', '2', '3',
203 '4', '5', '6', '7', '8', '9', '+', '/'
206 /** Base64 encode <b>srclen</b> bytes of data from <b>src</b>. Write
207 * the result into <b>dest</b>, if it will fit within <b>destlen</b>
208 * bytes. Return the number of bytes written on success; -1 if
209 * destlen is too short, or other failure.
211 * If <b>flags</b>&BASE64_ENCODE_MULTILINE is true, return encoded
212 * output in multiline format (64 character, `\n' terminated lines).
215 base64_encode(char *dest
, size_t destlen
, const char *src
, size_t srclen
,
218 const unsigned char *usrc
= (unsigned char *)src
;
219 const unsigned char *eous
= usrc
+ srclen
;
229 /* Ensure that there is sufficient space, including the NUL. */
230 enclen
= base64_encode_size(srclen
, flags
);
231 if (destlen
< enclen
+ 1)
233 if (destlen
> SIZE_T_CEILING
)
235 if (enclen
> INT_MAX
)
238 /* Make sure we leave no uninitialized data in the destination buffer. */
239 memset(dest
, 0, destlen
);
241 /* XXX/Yawning: If this ends up being too slow, this can be sped up
242 * by separating the multiline format case and the normal case, and
243 * processing 48 bytes of input at a time when newlines are desired.
245 #define ENCODE_CHAR(ch) \
248 if (flags & BASE64_ENCODE_MULTILINE) { \
249 if (++linelen % BASE64_OPENSSL_LINELEN == 0) { \
256 #define ENCODE_N(idx) \
257 ENCODE_CHAR(base64_encode_table[(n >> ((3 - idx) * 6)) & 0x3f])
259 #define ENCODE_PAD() ENCODE_CHAR('=')
261 /* Iterate over all the bytes in src. Each one will add 8 bits to the
262 * value we're encoding. Accumulate bits in <b>n</b>, and whenever we
263 * have 24 bits, batch them into 4 bytes and flush those bytes to dest.
265 for ( ; usrc
< eous
; ++usrc
) {
266 n
= (n
<< 8) | *usrc
;
267 if ((++n_idx
) == 3) {
278 /* 0 leftover bits, no padding to add. */
281 /* 8 leftover bits, pad to 12 bits, write the 2 6-bit values followed
282 * by 2 padding characters.
291 /* 16 leftover bits, pad to 18 bits, write the 3 6-bit values followed
292 * by 1 padding character.
300 // LCOV_EXCL_START -- we can't reach this point, because we enforce
301 // 0 <= ncov_idx < 3 in the loop above.
303 /* Something went catastrophically wrong. */
304 tor_fragile_assert();
313 /* Multiline output always includes at least one newline. */
314 if (flags
& BASE64_ENCODE_MULTILINE
&& linelen
!= 0)
317 tor_assert(d
- dest
== (ptrdiff_t)enclen
);
319 *d
++ = '\0'; /* NUL terminate the output. */
324 /** As base64_encode, but do not add any internal spaces, and remove external
325 * padding from the output stream.
326 * dest must be at least base64_encode_size(srclen, 0), including space for
327 * the removed external padding. */
329 base64_encode_nopad(char *dest
, size_t destlen
,
330 const uint8_t *src
, size_t srclen
)
332 int n
= base64_encode(dest
, destlen
, (const char*) src
, srclen
, 0);
335 tor_assert((size_t)n
< destlen
&& dest
[n
] == 0);
339 if (*in
== '=' || *in
== '\n') {
347 tor_assert(out
- dest
<= INT_MAX
);
349 return (int)(out
- dest
);
352 #undef BASE64_OPENSSL_LINELEN
355 /** Special values used for the base64_decode_table */
360 /** Internal table mapping byte values to what they represent in base64.
361 * Numbers 0..63 are 6-bit integers. SPs are spaces, and should be
362 * skipped. Xs are invalid and must not appear in base64. PAD indicates
364 static const uint8_t base64_decode_table
[256] = {
365 X
, X
, X
, X
, X
, X
, X
, X
, X
, SP
, SP
, SP
, X
, SP
, X
, X
, /* */
366 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
367 SP
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, 62, X
, X
, X
, 63,
368 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X
, X
, X
, PAD
, X
, X
,
369 X
, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
370 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X
, X
, X
, X
, X
,
371 X
, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
372 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X
, X
, X
, X
, X
,
373 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
374 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
375 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
376 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
377 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
378 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
379 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
380 X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
, X
,
383 /** Base64 decode <b>srclen</b> bytes of data from <b>src</b>. Write
384 * the result into <b>dest</b>, if it will fit within <b>destlen</b>
385 * bytes. Return the number of bytes written on success; -1 if
386 * destlen is too short, or other failure.
388 * NOTE 1: destlen is checked conservatively, as though srclen contained no
391 * NOTE 2: This implementation does not check for the correct number of
392 * padding "=" characters at the end of the string, and does not check
393 * for internal padding characters.
396 base64_decode(char *dest
, size_t destlen
, const char *src
, size_t srclen
)
398 const char *eos
= src
+srclen
;
403 if (destlen
> INT_MAX
)
406 /* Make sure we leave no uninitialized data in the destination buffer. */
407 memset(dest
, 0, destlen
);
409 /* Iterate over all the bytes in src. Each one will add 0 or 6 bits to the
410 * value we're decoding. Accumulate bits in <b>n</b>, and whenever we have
411 * 24 bits, batch them into 3 bytes and flush those bytes to dest.
413 for ( ; src
< eos
; ++src
) {
414 unsigned char c
= (unsigned char) *src
;
415 uint8_t v
= base64_decode_table
[c
];
418 /* This character isn't allowed in base64. */
421 /* This character is whitespace, and has no effect. */
424 /* We've hit an = character: the data is over. */
427 /* We have an actual 6-bit value. Append it to the bits in n. */
429 if ((++n_idx
) == 4) {
430 /* We've accumulated 24 bits in n. Flush them. */
431 if (destlen
< 3 || di
> destlen
- 3)
433 dest
[di
++] = (n
>>16);
434 dest
[di
++] = (n
>>8) & 0xff;
435 dest
[di
++] = (n
) & 0xff;
442 /* If we have leftover bits, we need to cope. */
446 /* No leftover bits. We win. */
449 /* 6 leftover bits. That's invalid; we can't form a byte out of that. */
452 /* 12 leftover bits: The last 4 are padding and the first 8 are data. */
453 if (destlen
< 1 || di
> destlen
- 1)
458 /* 18 leftover bits: The last 2 are padding and the first 16 are data. */
459 if (destlen
< 2 || di
> destlen
- 2)
461 dest
[di
++] = n
>> 10;
465 tor_assert(di
<= destlen
);
473 /** Encode the <b>srclen</b> bytes at <b>src</b> in a NUL-terminated,
474 * uppercase hexadecimal string; store it in the <b>destlen</b>-byte buffer
478 base16_encode(char *dest
, size_t destlen
, const char *src
, size_t srclen
)
483 tor_assert(srclen
< SIZE_T_CEILING
/ 2 - 1);
484 tor_assert(destlen
>= BASE16_BUFSIZE(srclen
));
485 tor_assert(destlen
< SIZE_T_CEILING
);
487 /* Make sure we leave no uninitialized data in the destination buffer. */
488 memset(dest
, 0, destlen
);
493 *cp
++ = "0123456789ABCDEF"[ (*(const uint8_t*)src
) >> 4 ];
494 *cp
++ = "0123456789ABCDEF"[ (*(const uint8_t*)src
) & 0xf ];
500 /** Given a hexadecimal string of <b>srclen</b> bytes in <b>src</b>, decode
501 * it and store the result in the <b>destlen</b>-byte buffer at <b>dest</b>.
502 * Return the number of bytes decoded on success, -1 on failure. If
503 * <b>destlen</b> is greater than INT_MAX or less than half of
504 * <b>srclen</b>, -1 is returned. */
506 base16_decode(char *dest
, size_t destlen
, const char *src
, size_t srclen
)
509 char *dest_orig
= dest
;
512 if ((srclen
% 2) != 0)
514 if (destlen
< srclen
/2 || destlen
> INT_MAX
)
517 /* Make sure we leave no uninitialized data in the destination buffer. */
518 memset(dest
, 0, destlen
);
522 v1
= hex_decode_digit(*src
);
523 v2
= hex_decode_digit(*(src
+1));
526 *(uint8_t*)dest
= (v1
<<4)|v2
;
531 tor_assert((dest
-dest_orig
) <= (ptrdiff_t) destlen
);
533 return (int) (dest
-dest_orig
);