cksum: use AVX2 and AVX512 for speedup
[coreutils.git] / src / basenc.c
blobf9b364d3aae94d86413445248cd59ccd364e393e
1 /* Base64, base32, and similar encoding/decoding strings or files.
2 Copyright (C) 2004-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Simon Josefsson <simon@josefsson.org>. */
19 #include <config.h>
21 #include <stdio.h>
22 #include <getopt.h>
23 #include <sys/types.h>
25 #include "system.h"
26 #include "assure.h"
27 #include "c-ctype.h"
28 #include "fadvise.h"
29 #include "quote.h"
30 #include "xstrtol.h"
31 #include "xdectoint.h"
32 #include "xbinary-io.h"
34 #if BASE_TYPE == 42
35 # define AUTHORS \
36 proper_name ("Simon Josefsson"), \
37 proper_name ("Assaf Gordon")
38 #else
39 # define AUTHORS proper_name ("Simon Josefsson")
40 #endif
42 #if BASE_TYPE == 32
43 # include "base32.h"
44 # define PROGRAM_NAME "base32"
45 #elif BASE_TYPE == 64
46 # include "base64.h"
47 # define PROGRAM_NAME "base64"
48 #elif BASE_TYPE == 42
49 # include "base32.h"
50 # include "base64.h"
51 # include "assure.h"
52 # define PROGRAM_NAME "basenc"
53 #else
54 # error missing/invalid BASE_TYPE definition
55 #endif
59 #if BASE_TYPE == 42
60 enum
62 BASE64_OPTION = CHAR_MAX + 1,
63 BASE64URL_OPTION,
64 BASE32_OPTION,
65 BASE32HEX_OPTION,
66 BASE16_OPTION,
67 BASE2MSBF_OPTION,
68 BASE2LSBF_OPTION,
69 Z85_OPTION
71 #endif
73 static struct option const long_options[] =
75 {"decode", no_argument, 0, 'd'},
76 {"wrap", required_argument, 0, 'w'},
77 {"ignore-garbage", no_argument, 0, 'i'},
78 #if BASE_TYPE == 42
79 {"base64", no_argument, 0, BASE64_OPTION},
80 {"base64url", no_argument, 0, BASE64URL_OPTION},
81 {"base32", no_argument, 0, BASE32_OPTION},
82 {"base32hex", no_argument, 0, BASE32HEX_OPTION},
83 {"base16", no_argument, 0, BASE16_OPTION},
84 {"base2msbf", no_argument, 0, BASE2MSBF_OPTION},
85 {"base2lsbf", no_argument, 0, BASE2LSBF_OPTION},
86 {"z85", no_argument, 0, Z85_OPTION},
87 #endif
88 {GETOPT_HELP_OPTION_DECL},
89 {GETOPT_VERSION_OPTION_DECL},
90 {nullptr, 0, nullptr, 0}
93 void
94 usage (int status)
96 if (status != EXIT_SUCCESS)
97 emit_try_help ();
98 else
100 printf (_("\
101 Usage: %s [OPTION]... [FILE]\n\
102 "), program_name);
104 #if BASE_TYPE == 42
105 fputs (_("\
106 basenc encode or decode FILE, or standard input, to standard output.\n\
107 "), stdout);
108 #else
109 printf (_("\
110 Base%d encode or decode FILE, or standard input, to standard output.\n\
111 "), BASE_TYPE);
112 #endif
114 emit_stdin_note ();
115 emit_mandatory_arg_note ();
116 #if BASE_TYPE == 42
117 fputs (_("\
118 --base64 same as 'base64' program (RFC4648 section 4)\n\
119 "), stdout);
120 fputs (_("\
121 --base64url file- and url-safe base64 (RFC4648 section 5)\n\
122 "), stdout);
123 fputs (_("\
124 --base32 same as 'base32' program (RFC4648 section 6)\n\
125 "), stdout);
126 fputs (_("\
127 --base32hex extended hex alphabet base32 (RFC4648 section 7)\n\
128 "), stdout);
129 fputs (_("\
130 --base16 hex encoding (RFC4648 section 8)\n\
131 "), stdout);
132 fputs (_("\
133 --base2msbf bit string with most significant bit (msb) first\n\
134 "), stdout);
135 fputs (_("\
136 --base2lsbf bit string with least significant bit (lsb) first\n\
137 "), stdout);
138 #endif
139 fputs (_("\
140 -d, --decode decode data\n\
141 -i, --ignore-garbage when decoding, ignore non-alphabet characters\n\
142 -w, --wrap=COLS wrap encoded lines after COLS character (default 76).\n\
143 Use 0 to disable line wrapping\n\
144 "), stdout);
145 #if BASE_TYPE == 42
146 fputs (_("\
147 --z85 ascii85-like encoding (ZeroMQ spec:32/Z85);\n\
148 when encoding, input length must be a multiple of 4;\n\
149 when decoding, input length must be a multiple of 5\n\
150 "), stdout);
151 #endif
152 fputs (HELP_OPTION_DESCRIPTION, stdout);
153 fputs (VERSION_OPTION_DESCRIPTION, stdout);
154 #if BASE_TYPE == 42
155 fputs (_("\
157 When decoding, the input may contain newlines in addition to the bytes of\n\
158 the formal alphabet. Use --ignore-garbage to attempt to recover\n\
159 from any other non-alphabet bytes in the encoded stream.\n\
160 "), stdout);
161 #else
162 printf (_("\
164 The data are encoded as described for the %s alphabet in RFC 4648.\n\
165 When decoding, the input may contain newlines in addition to the bytes of\n\
166 the formal %s alphabet. Use --ignore-garbage to attempt to recover\n\
167 from any other non-alphabet bytes in the encoded stream.\n"),
168 PROGRAM_NAME, PROGRAM_NAME);
169 #endif
170 emit_ancillary_info (PROGRAM_NAME);
173 exit (status);
176 #if BASE_TYPE != 64
177 static int
178 base32_required_padding (int len)
180 int partial = len % 8;
181 return partial ? 8 - partial : 0;
183 #endif
185 #if BASE_TYPE != 32
186 static int
187 base64_required_padding (int len)
189 int partial = len % 4;
190 return partial ? 4 - partial : 0;
192 #endif
194 #if BASE_TYPE == 42
195 static int
196 no_required_padding (int len)
198 return 0;
200 #endif
202 #define ENC_BLOCKSIZE (1024 * 3 * 10)
204 #if BASE_TYPE == 32
205 # define BASE_LENGTH BASE32_LENGTH
206 # define REQUIRED_PADDING base32_required_padding
207 /* Note that increasing this may decrease performance if --ignore-garbage
208 is used, because of the memmove operation below. */
209 # define DEC_BLOCKSIZE (1024 * 5)
211 /* Ensure that BLOCKSIZE is a multiple of 5 and 8. */
212 static_assert (ENC_BLOCKSIZE % 40 == 0); /* Padding chars only on last block. */
213 static_assert (DEC_BLOCKSIZE % 40 == 0); /* Complete encoded blocks are used. */
215 # define base_encode base32_encode
216 # define base_decode_context base32_decode_context
217 # define base_decode_ctx_init base32_decode_ctx_init
218 # define base_decode_ctx base32_decode_ctx
219 # define isubase isubase32
220 #elif BASE_TYPE == 64
221 # define BASE_LENGTH BASE64_LENGTH
222 # define REQUIRED_PADDING base64_required_padding
223 /* Note that increasing this may decrease performance if --ignore-garbage
224 is used, because of the memmove operation below. */
225 # define DEC_BLOCKSIZE (1024 * 3)
227 /* Ensure that BLOCKSIZE is a multiple of 3 and 4. */
228 static_assert (ENC_BLOCKSIZE % 12 == 0); /* Padding chars only on last block. */
229 static_assert (DEC_BLOCKSIZE % 12 == 0); /* Complete encoded blocks are used. */
231 # define base_encode base64_encode
232 # define base_decode_context base64_decode_context
233 # define base_decode_ctx_init base64_decode_ctx_init
234 # define base_decode_ctx base64_decode_ctx
235 # define isubase isubase64
236 #elif BASE_TYPE == 42
239 # define BASE_LENGTH base_length
240 # define REQUIRED_PADDING required_padding
242 /* Note that increasing this may decrease performance if --ignore-garbage
243 is used, because of the memmove operation below. */
244 # define DEC_BLOCKSIZE (4200)
245 static_assert (DEC_BLOCKSIZE % 40 == 0); /* complete encoded blocks for base32*/
246 static_assert (DEC_BLOCKSIZE % 12 == 0); /* complete encoded blocks for base64*/
248 static int (*base_length) (int i);
249 static int (*required_padding) (int i);
250 static bool (*isubase) (unsigned char ch);
251 static void (*base_encode) (char const *restrict in, idx_t inlen,
252 char *restrict out, idx_t outlen);
254 struct base16_decode_context
256 /* Either a 4-bit nibble, or negative if we have no nibble. */
257 signed char nibble;
260 struct z85_decode_context
262 int i;
263 unsigned char octets[5];
266 struct base2_decode_context
268 unsigned char octet;
271 struct base_decode_context
273 int i; /* will be updated manually */
274 union {
275 struct base64_decode_context base64;
276 struct base32_decode_context base32;
277 struct base16_decode_context base16;
278 struct base2_decode_context base2;
279 struct z85_decode_context z85;
280 } ctx;
281 char *inbuf;
282 idx_t bufsize;
284 static void (*base_decode_ctx_init) (struct base_decode_context *ctx);
285 static bool (*base_decode_ctx) (struct base_decode_context *ctx,
286 char const *restrict in, idx_t inlen,
287 char *restrict out, idx_t *outlen);
288 #endif
293 #if BASE_TYPE == 42
295 static int
296 base64_length_wrapper (int len)
298 return BASE64_LENGTH (len);
301 static void
302 base64_decode_ctx_init_wrapper (struct base_decode_context *ctx)
304 base64_decode_ctx_init (&ctx->ctx.base64);
307 static bool
308 base64_decode_ctx_wrapper (struct base_decode_context *ctx,
309 char const *restrict in, idx_t inlen,
310 char *restrict out, idx_t *outlen)
312 bool b = base64_decode_ctx (&ctx->ctx.base64, in, inlen, out, outlen);
313 ctx->i = ctx->ctx.base64.i;
314 return b;
317 static void
318 init_inbuf (struct base_decode_context *ctx)
320 ctx->bufsize = DEC_BLOCKSIZE;
321 ctx->inbuf = xcharalloc (ctx->bufsize);
324 static void
325 prepare_inbuf (struct base_decode_context *ctx, idx_t inlen)
327 if (ctx->bufsize < inlen)
328 ctx->inbuf = xpalloc (ctx->inbuf, &ctx->bufsize,
329 inlen - ctx->bufsize, -1, sizeof *ctx->inbuf);
333 static void
334 base64url_encode (char const *restrict in, idx_t inlen,
335 char *restrict out, idx_t outlen)
337 base64_encode (in, inlen, out, outlen);
338 /* translate 62nd and 63rd characters */
339 char *p = out;
340 while (outlen--)
342 if (*p == '+')
343 *p = '-';
344 else if (*p == '/')
345 *p = '_';
346 ++p;
350 static bool
351 isubase64url (unsigned char ch)
353 return (ch == '-' || ch == '_'
354 || (ch != '+' && ch != '/' && isubase64 (ch)));
357 static void
358 base64url_decode_ctx_init_wrapper (struct base_decode_context *ctx)
360 base64_decode_ctx_init (&ctx->ctx.base64);
361 init_inbuf (ctx);
365 static bool
366 base64url_decode_ctx_wrapper (struct base_decode_context *ctx,
367 char const *restrict in, idx_t inlen,
368 char *restrict out, idx_t *outlen)
370 prepare_inbuf (ctx, inlen);
371 memcpy (ctx->inbuf, in, inlen);
373 /* translate 62nd and 63rd characters */
374 idx_t i = inlen;
375 char *p = ctx->inbuf;
376 while (i--)
378 if (*p == '+' || *p == '/')
380 *outlen = 0;
381 return false; /* reject base64 input */
383 else if (*p == '-')
384 *p = '+';
385 else if (*p == '_')
386 *p = '/';
387 ++p;
390 bool b = base64_decode_ctx (&ctx->ctx.base64, ctx->inbuf, inlen,
391 out, outlen);
392 ctx->i = ctx->ctx.base64.i;
394 return b;
399 static int
400 base32_length_wrapper (int len)
402 return BASE32_LENGTH (len);
405 static void
406 base32_decode_ctx_init_wrapper (struct base_decode_context *ctx)
408 base32_decode_ctx_init (&ctx->ctx.base32);
411 static bool
412 base32_decode_ctx_wrapper (struct base_decode_context *ctx,
413 char const *restrict in, idx_t inlen,
414 char *restrict out, idx_t *outlen)
416 bool b = base32_decode_ctx (&ctx->ctx.base32, in, inlen, out, outlen);
417 ctx->i = ctx->ctx.base32.i;
418 return b;
421 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ234567
423 0123456789ABCDEFGHIJKLMNOPQRSTUV */
424 static const char base32_norm_to_hex[32 + 9] = {
425 /*0x32, 0x33, 0x34, 0x35, 0x36, 0x37, */
426 'Q', 'R', 'S', 'T', 'U', 'V',
428 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
430 /*0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, */
431 '0', '1', '2', '3', '4', '5', '6', '7',
433 /*0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, */
434 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
436 /*0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, */
437 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
439 /*0x59, 0x5a, */
440 'O', 'P',
443 /* 0123456789ABCDEFGHIJKLMNOPQRSTUV
445 ABCDEFGHIJKLMNOPQRSTUVWXYZ234567 */
446 static const char base32_hex_to_norm[32 + 9] = {
447 /* from: 0x30 .. 0x39 ('0' to '9') */
448 /* to:*/ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
450 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
452 /* from: 0x41 .. 0x4A ('A' to 'J') */
453 /* to:*/ 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
455 /* from: 0x4B .. 0x54 ('K' to 'T') */
456 /* to:*/ 'U', 'V', 'W', 'X', 'Y', 'Z', '2', '3', '4', '5',
458 /* from: 0x55 .. 0x56 ('U' to 'V') */
459 /* to:*/ '6', '7'
463 inline static bool
464 isubase32hex (unsigned char ch)
466 return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'V');
470 static void
471 base32hex_encode (char const *restrict in, idx_t inlen,
472 char *restrict out, idx_t outlen)
474 base32_encode (in, inlen, out, outlen);
476 for (char *p = out; outlen--; p++)
478 affirm (0x32 <= *p && *p <= 0x5a); /* LCOV_EXCL_LINE */
479 *p = base32_norm_to_hex[*p - 0x32];
484 static void
485 base32hex_decode_ctx_init_wrapper (struct base_decode_context *ctx)
487 base32_decode_ctx_init (&ctx->ctx.base32);
488 init_inbuf (ctx);
492 static bool
493 base32hex_decode_ctx_wrapper (struct base_decode_context *ctx,
494 char const *restrict in, idx_t inlen,
495 char *restrict out, idx_t *outlen)
497 prepare_inbuf (ctx, inlen);
499 idx_t i = inlen;
500 char *p = ctx->inbuf;
501 while (i--)
503 if (isubase32hex (*in))
504 *p = base32_hex_to_norm[*in - 0x30];
505 else
506 *p = *in;
507 ++p;
508 ++in;
511 bool b = base32_decode_ctx (&ctx->ctx.base32, ctx->inbuf, inlen,
512 out, outlen);
513 ctx->i = ctx->ctx.base32.i;
515 return b;
517 /* With this approach this file works independent of the charset used
518 (think EBCDIC). However, it does assume that the characters in the
519 Base32 alphabet (A-Z2-7) are encoded in 0..255. POSIX
520 1003.1-2001 require that char and unsigned char are 8-bit
521 quantities, though, taking care of that problem. But this may be a
522 potential problem on non-POSIX C99 platforms.
524 IBM C V6 for AIX mishandles "#define B32(x) ...'x'...", so use "_"
525 as the formal parameter rather than "x". */
526 # define B16(_) \
527 ((_) == '0' ? 0 \
528 : (_) == '1' ? 1 \
529 : (_) == '2' ? 2 \
530 : (_) == '3' ? 3 \
531 : (_) == '4' ? 4 \
532 : (_) == '5' ? 5 \
533 : (_) == '6' ? 6 \
534 : (_) == '7' ? 7 \
535 : (_) == '8' ? 8 \
536 : (_) == '9' ? 9 \
537 : (_) == 'A' || (_) == 'a' ? 10 \
538 : (_) == 'B' || (_) == 'b' ? 11 \
539 : (_) == 'C' || (_) == 'c' ? 12 \
540 : (_) == 'D' || (_) == 'd' ? 13 \
541 : (_) == 'E' || (_) == 'e' ? 14 \
542 : (_) == 'F' || (_) == 'f' ? 15 \
543 : -1)
545 static signed char const base16_to_int[256] = {
546 B16 (0), B16 (1), B16 (2), B16 (3),
547 B16 (4), B16 (5), B16 (6), B16 (7),
548 B16 (8), B16 (9), B16 (10), B16 (11),
549 B16 (12), B16 (13), B16 (14), B16 (15),
550 B16 (16), B16 (17), B16 (18), B16 (19),
551 B16 (20), B16 (21), B16 (22), B16 (23),
552 B16 (24), B16 (25), B16 (26), B16 (27),
553 B16 (28), B16 (29), B16 (30), B16 (31),
554 B16 (32), B16 (33), B16 (34), B16 (35),
555 B16 (36), B16 (37), B16 (38), B16 (39),
556 B16 (40), B16 (41), B16 (42), B16 (43),
557 B16 (44), B16 (45), B16 (46), B16 (47),
558 B16 (48), B16 (49), B16 (50), B16 (51),
559 B16 (52), B16 (53), B16 (54), B16 (55),
560 B16 (56), B16 (57), B16 (58), B16 (59),
561 B16 (60), B16 (61), B16 (62), B16 (63),
562 B16 (32), B16 (65), B16 (66), B16 (67),
563 B16 (68), B16 (69), B16 (70), B16 (71),
564 B16 (72), B16 (73), B16 (74), B16 (75),
565 B16 (76), B16 (77), B16 (78), B16 (79),
566 B16 (80), B16 (81), B16 (82), B16 (83),
567 B16 (84), B16 (85), B16 (86), B16 (87),
568 B16 (88), B16 (89), B16 (90), B16 (91),
569 B16 (92), B16 (93), B16 (94), B16 (95),
570 B16 (96), B16 (97), B16 (98), B16 (99),
571 B16 (100), B16 (101), B16 (102), B16 (103),
572 B16 (104), B16 (105), B16 (106), B16 (107),
573 B16 (108), B16 (109), B16 (110), B16 (111),
574 B16 (112), B16 (113), B16 (114), B16 (115),
575 B16 (116), B16 (117), B16 (118), B16 (119),
576 B16 (120), B16 (121), B16 (122), B16 (123),
577 B16 (124), B16 (125), B16 (126), B16 (127),
578 B16 (128), B16 (129), B16 (130), B16 (131),
579 B16 (132), B16 (133), B16 (134), B16 (135),
580 B16 (136), B16 (137), B16 (138), B16 (139),
581 B16 (140), B16 (141), B16 (142), B16 (143),
582 B16 (144), B16 (145), B16 (146), B16 (147),
583 B16 (148), B16 (149), B16 (150), B16 (151),
584 B16 (152), B16 (153), B16 (154), B16 (155),
585 B16 (156), B16 (157), B16 (158), B16 (159),
586 B16 (160), B16 (161), B16 (162), B16 (163),
587 B16 (132), B16 (165), B16 (166), B16 (167),
588 B16 (168), B16 (169), B16 (170), B16 (171),
589 B16 (172), B16 (173), B16 (174), B16 (175),
590 B16 (176), B16 (177), B16 (178), B16 (179),
591 B16 (180), B16 (181), B16 (182), B16 (183),
592 B16 (184), B16 (185), B16 (186), B16 (187),
593 B16 (188), B16 (189), B16 (190), B16 (191),
594 B16 (192), B16 (193), B16 (194), B16 (195),
595 B16 (196), B16 (197), B16 (198), B16 (199),
596 B16 (200), B16 (201), B16 (202), B16 (203),
597 B16 (204), B16 (205), B16 (206), B16 (207),
598 B16 (208), B16 (209), B16 (210), B16 (211),
599 B16 (212), B16 (213), B16 (214), B16 (215),
600 B16 (216), B16 (217), B16 (218), B16 (219),
601 B16 (220), B16 (221), B16 (222), B16 (223),
602 B16 (224), B16 (225), B16 (226), B16 (227),
603 B16 (228), B16 (229), B16 (230), B16 (231),
604 B16 (232), B16 (233), B16 (234), B16 (235),
605 B16 (236), B16 (237), B16 (238), B16 (239),
606 B16 (240), B16 (241), B16 (242), B16 (243),
607 B16 (244), B16 (245), B16 (246), B16 (247),
608 B16 (248), B16 (249), B16 (250), B16 (251),
609 B16 (252), B16 (253), B16 (254), B16 (255)
612 static bool
613 isubase16 (unsigned char ch)
615 return ch < sizeof base16_to_int && 0 <= base16_to_int[ch];
618 static int
619 base16_length (int len)
621 return len * 2;
625 static void
626 base16_encode (char const *restrict in, idx_t inlen,
627 char *restrict out, idx_t outlen)
629 static const char base16[16] = "0123456789ABCDEF";
631 while (inlen && outlen)
633 unsigned char c = *in;
634 *out++ = base16[c >> 4];
635 *out++ = base16[c & 0x0F];
636 ++in;
637 inlen--;
638 outlen -= 2;
643 static void
644 base16_decode_ctx_init (struct base_decode_context *ctx)
646 init_inbuf (ctx);
647 ctx->ctx.base16.nibble = -1;
648 ctx->i = 1;
652 static bool
653 base16_decode_ctx (struct base_decode_context *ctx,
654 char const *restrict in, idx_t inlen,
655 char *restrict out, idx_t *outlen)
657 bool ignore_lines = true; /* for now, always ignore them */
658 char *out0 = out;
659 signed char nibble = ctx->ctx.base16.nibble;
661 /* inlen==0 is request to flush output.
662 if there is a dangling high nibble - we are missing the low nibble,
663 so return false - indicating an invalid input. */
664 if (inlen == 0)
666 *outlen = 0;
667 return nibble < 0;
670 while (inlen--)
672 unsigned char c = *in++;
673 if (ignore_lines && c == '\n')
674 continue;
676 if (sizeof base16_to_int <= c || base16_to_int[c] < 0)
678 *outlen = out - out0;
679 return false; /* garbage - return false */
682 if (nibble < 0)
683 nibble = base16_to_int[c];
684 else
686 /* have both nibbles, write octet */
687 *out++ = (nibble << 4) + base16_to_int[c];
688 nibble = -1;
692 ctx->ctx.base16.nibble = nibble;
693 *outlen = out - out0;
694 return true;
700 static int
701 z85_length (int len)
703 /* Z85 does not allow padding, so no need to round to highest integer. */
704 int outlen = (len * 5) / 4;
705 return outlen;
708 static bool
709 isuz85 (unsigned char ch)
711 return c_isalnum (ch) || strchr (".-:+=^!/*?&<>()[]{}@%$#", ch) != nullptr;
714 static char const z85_encoding[85] =
715 "0123456789"
716 "abcdefghijklmnopqrstuvwxyz"
717 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
718 ".-:+=^!/*?&<>()[]{}@%$#";
720 static void
721 z85_encode (char const *restrict in, idx_t inlen,
722 char *restrict out, idx_t outlen)
724 int i = 0;
725 unsigned char quad[4];
726 idx_t outidx = 0;
728 while (true)
730 if (inlen == 0)
732 /* no more input, exactly on 4 octet boundary. */
733 if (i == 0)
734 return;
736 /* currently, there's no way to return an error in encoding. */
737 error (EXIT_FAILURE, 0,
738 _("invalid input (length must be multiple of 4 characters)"));
740 else
742 quad[i++] = *in++;
743 --inlen;
746 /* Got a quad, encode it */
747 if (i == 4)
749 int_fast64_t val = quad[0];
750 val = (val << 24) + (quad[1] << 16) + (quad[2] << 8) + quad[3];
752 for (int j = 4; j >= 0; --j)
754 int c = val % 85;
755 val /= 85;
757 /* NOTE: if there is padding (which is trimmed by z85
758 before outputting the result), the output buffer 'out'
759 might not include enough allocated bytes for the padding,
760 so don't store them. */
761 if (outidx + j < outlen)
762 out[j] = z85_encoding[c];
764 out += 5;
765 outidx += 5;
766 i = 0;
771 static void
772 z85_decode_ctx_init (struct base_decode_context *ctx)
774 init_inbuf (ctx);
775 ctx->ctx.z85.i = 0;
776 ctx->i = 1;
780 # define Z85_LO_CTX_TO_32BIT_VAL(ctx) \
781 (((ctx)->ctx.z85.octets[1] * 85 * 85 * 85) + \
782 ((ctx)->ctx.z85.octets[2] * 85 * 85) + \
783 ((ctx)->ctx.z85.octets[3] * 85) + \
784 ((ctx)->ctx.z85.octets[4]))
787 # define Z85_HI_CTX_TO_32BIT_VAL(ctx) \
788 ((int_fast64_t) (ctx)->ctx.z85.octets[0] * 85 * 85 * 85 * 85 )
791 0 - 9: 0 1 2 3 4 5 6 7 8 9
792 10 - 19: a b c d e f g h i j
793 20 - 29: k l m n o p q r s t
794 30 - 39: u v w x y z A B C D
795 40 - 49: E F G H I J K L M N
796 50 - 59: O P Q R S T U V W X
797 60 - 69: Y Z . - : + = ^ ! / #dummy comment to workaround syntax-check
798 70 - 79: * ? & < > ( ) [ ] {
799 80 - 84: } @ % $ #
801 static signed char const z85_decoding[93] = {
802 68, -1, 84, 83, 82, 72, -1, /* ! " # $ % & ' */
803 75, 76, 70, 65, -1, 63, 62, 69, /* ( ) * + , - . / */
804 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* '0' to '9' */
805 64, -1, 73, 66, 74, 71, 81, /* : ; < = > ? @ */
806 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, /* 'A' to 'J' */
807 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, /* 'K' to 'T' */
808 56, 57, 58, 59, 60, 61, /* 'U' to 'Z' */
809 77, -1, 78, 67, -1, -1, /* [ \ ] ^ _ ` */
810 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /* 'a' to 'j' */
811 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, /* 'k' to 't' */
812 30, 31, 32, 33, 34, 35, /* 'u' to 'z' */
813 79, -1, 80 /* { | } */
816 static bool
817 z85_decode_ctx (struct base_decode_context *ctx,
818 char const *restrict in, idx_t inlen,
819 char *restrict out, idx_t *outlen)
821 bool ignore_lines = true; /* for now, always ignore them */
823 *outlen = 0;
825 /* inlen==0 is request to flush output.
826 if there are dangling values - we are missing entries,
827 so return false - indicating an invalid input. */
828 if (inlen == 0)
830 if (ctx->ctx.z85.i > 0)
832 /* Z85 variant does not allow padding - input must
833 be a multiple of 5 - so return error. */
834 return false;
836 return true;
839 while (inlen--)
841 if (ignore_lines && *in == '\n')
843 ++in;
844 continue;
847 /* z85 decoding */
848 unsigned char c = *in;
850 if (c >= 33 && c <= 125)
852 signed char ch = z85_decoding[c - 33];
853 if (ch < 0)
854 return false; /* garbage - return false */
855 c = ch;
857 else
858 return false; /* garbage - return false */
860 ++in;
862 ctx->ctx.z85.octets[ctx->ctx.z85.i++] = c;
863 if (ctx->ctx.z85.i == 5)
865 /* decode the lowest 4 octets, then check for overflows. */
866 int_fast64_t val = Z85_LO_CTX_TO_32BIT_VAL (ctx);
868 /* The Z85 spec and the reference implementation say nothing
869 about overflows. To be on the safe side, reject them. */
871 val += Z85_HI_CTX_TO_32BIT_VAL (ctx);
872 if ((val >> 24) & ~0xFF)
873 return false;
875 *out++ = val >> 24;
876 *out++ = (val >> 16) & 0xFF;
877 *out++ = (val >> 8) & 0xFF;
878 *out++ = val & 0xFF;
880 *outlen += 4;
882 ctx->ctx.z85.i = 0;
885 ctx->i = ctx->ctx.z85.i;
886 return true;
890 inline static bool
891 isubase2 (unsigned char ch)
893 return ch == '0' || ch == '1';
896 static int
897 base2_length (int len)
899 return len * 8;
903 inline static void
904 base2msbf_encode (char const *restrict in, idx_t inlen,
905 char *restrict out, idx_t outlen)
907 while (inlen && outlen)
909 unsigned char c = *in;
910 for (int i = 0; i < 8; i++)
912 *out++ = c & 0x80 ? '1' : '0';
913 c <<= 1;
915 inlen--;
916 outlen -= 8;
917 ++in;
921 inline static void
922 base2lsbf_encode (char const *restrict in, idx_t inlen,
923 char *restrict out, idx_t outlen)
925 while (inlen && outlen)
927 unsigned char c = *in;
928 for (int i = 0; i < 8; i++)
930 *out++ = c & 0x01 ? '1' : '0';
931 c >>= 1;
933 inlen--;
934 outlen -= 8;
935 ++in;
940 static void
941 base2_decode_ctx_init (struct base_decode_context *ctx)
943 init_inbuf (ctx);
944 ctx->ctx.base2.octet = 0;
945 ctx->i = 0;
949 static bool
950 base2lsbf_decode_ctx (struct base_decode_context *ctx,
951 char const *restrict in, idx_t inlen,
952 char *restrict out, idx_t *outlen)
954 bool ignore_lines = true; /* for now, always ignore them */
956 *outlen = 0;
958 /* inlen==0 is request to flush output.
959 if there is a dangling bit - we are missing some bits,
960 so return false - indicating an invalid input. */
961 if (inlen == 0)
962 return ctx->i == 0;
964 while (inlen--)
966 if (ignore_lines && *in == '\n')
968 ++in;
969 continue;
972 if (!isubase2 (*in))
973 return false;
975 bool bit = (*in == '1');
976 ctx->ctx.base2.octet |= bit << ctx->i;
977 ++ctx->i;
979 if (ctx->i == 8)
981 *out++ = ctx->ctx.base2.octet;
982 ctx->ctx.base2.octet = 0;
983 ++*outlen;
984 ctx->i = 0;
987 ++in;
990 return true;
993 static bool
994 base2msbf_decode_ctx (struct base_decode_context *ctx,
995 char const *restrict in, idx_t inlen,
996 char *restrict out, idx_t *outlen)
998 bool ignore_lines = true; /* for now, always ignore them */
1000 *outlen = 0;
1002 /* inlen==0 is request to flush output.
1003 if there is a dangling bit - we are missing some bits,
1004 so return false - indicating an invalid input. */
1005 if (inlen == 0)
1006 return ctx->i == 0;
1008 while (inlen--)
1010 if (ignore_lines && *in == '\n')
1012 ++in;
1013 continue;
1016 if (!isubase2 (*in))
1017 return false;
1019 bool bit = (*in == '1');
1020 if (ctx->i == 0)
1021 ctx->i = 8;
1022 --ctx->i;
1023 ctx->ctx.base2.octet |= bit << ctx->i;
1025 if (ctx->i == 0)
1027 *out++ = ctx->ctx.base2.octet;
1028 ctx->ctx.base2.octet = 0;
1029 ++*outlen;
1030 ctx->i = 0;
1033 ++in;
1036 return true;
1039 #endif /* BASE_TYPE == 42, i.e., "basenc"*/
1043 static void
1044 wrap_write (char const *buffer, idx_t len,
1045 idx_t wrap_column, idx_t *current_column, FILE *out)
1047 if (wrap_column == 0)
1049 /* Simple write. */
1050 if (fwrite (buffer, 1, len, stdout) < len)
1051 write_error ();
1053 else
1054 for (idx_t written = 0; written < len; )
1056 idx_t to_write = MIN (wrap_column - *current_column, len - written);
1058 if (to_write == 0)
1060 if (fputc ('\n', out) == EOF)
1061 write_error ();
1062 *current_column = 0;
1064 else
1066 if (fwrite (buffer + written, 1, to_write, stdout) < to_write)
1067 write_error ();
1068 *current_column += to_write;
1069 written += to_write;
1074 static _Noreturn void
1075 finish_and_exit (FILE *in, char const *infile)
1077 if (fclose (in) != 0)
1079 if (STREQ (infile, "-"))
1080 error (EXIT_FAILURE, errno, _("closing standard input"));
1081 else
1082 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1085 exit (EXIT_SUCCESS);
1088 static _Noreturn void
1089 do_encode (FILE *in, char const *infile, FILE *out, idx_t wrap_column)
1091 idx_t current_column = 0;
1092 char *inbuf, *outbuf;
1093 idx_t sum;
1095 inbuf = xmalloc (ENC_BLOCKSIZE);
1096 outbuf = xmalloc (BASE_LENGTH (ENC_BLOCKSIZE));
1100 idx_t n;
1102 sum = 0;
1105 n = fread (inbuf + sum, 1, ENC_BLOCKSIZE - sum, in);
1106 sum += n;
1108 while (!feof (in) && !ferror (in) && sum < ENC_BLOCKSIZE);
1110 if (sum > 0)
1112 /* Process input one block at a time. Note that ENC_BLOCKSIZE
1113 is sized so that no pad chars will appear in output. */
1114 base_encode (inbuf, sum, outbuf, BASE_LENGTH (sum));
1116 wrap_write (outbuf, BASE_LENGTH (sum), wrap_column,
1117 &current_column, out);
1120 while (!feof (in) && !ferror (in) && sum == ENC_BLOCKSIZE);
1122 /* When wrapping, terminate last line. */
1123 if (wrap_column && current_column > 0 && fputc ('\n', out) == EOF)
1124 write_error ();
1126 if (ferror (in))
1127 error (EXIT_FAILURE, errno, _("read error"));
1129 finish_and_exit (in, infile);
1132 static _Noreturn void
1133 do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage)
1135 char *inbuf, *outbuf;
1136 idx_t sum;
1137 struct base_decode_context ctx;
1139 char padbuf[8] = "========";
1140 inbuf = xmalloc (BASE_LENGTH (DEC_BLOCKSIZE));
1141 outbuf = xmalloc (DEC_BLOCKSIZE);
1143 #if BASE_TYPE == 42
1144 ctx.inbuf = nullptr;
1145 #endif
1146 base_decode_ctx_init (&ctx);
1150 bool ok;
1152 sum = 0;
1155 idx_t n = fread (inbuf + sum,
1156 1, BASE_LENGTH (DEC_BLOCKSIZE) - sum, in);
1158 if (ignore_garbage)
1160 for (idx_t i = 0; n > 0 && i < n;)
1162 if (isubase (inbuf[sum + i]) || inbuf[sum + i] == '=')
1163 i++;
1164 else
1165 memmove (inbuf + sum + i, inbuf + sum + i + 1, --n - i);
1169 sum += n;
1171 if (ferror (in))
1172 error (EXIT_FAILURE, errno, _("read error"));
1174 while (sum < BASE_LENGTH (DEC_BLOCKSIZE) && !feof (in));
1176 /* The following "loop" is usually iterated just once.
1177 However, when it processes the final input buffer, we want
1178 to iterate it one additional time, but with an indicator
1179 telling it to flush what is in CTX. */
1180 for (int k = 0; k < 1 + !!feof (in); k++)
1182 if (k == 1)
1184 if (ctx.i == 0)
1185 break;
1187 /* auto pad input (at eof). */
1188 idx_t auto_padding = REQUIRED_PADDING (ctx.i);
1189 if (auto_padding && (sum == 0 || inbuf[sum - 1] != '='))
1191 affirm (auto_padding <= sizeof (padbuf));
1192 IF_LINT (free (inbuf));
1193 sum = auto_padding;
1194 inbuf = padbuf;
1196 else
1197 sum = 0; /* process ctx buffer only */
1199 idx_t n = DEC_BLOCKSIZE;
1200 ok = base_decode_ctx (&ctx, inbuf, sum, outbuf, &n);
1202 if (fwrite (outbuf, 1, n, out) < n)
1203 write_error ();
1205 if (!ok)
1206 error (EXIT_FAILURE, 0, _("invalid input"));
1209 while (!feof (in));
1211 finish_and_exit (in, infile);
1215 main (int argc, char **argv)
1217 int opt;
1218 FILE *input_fh;
1219 char const *infile;
1221 /* True if --decode has been given and we should decode data. */
1222 bool decode = false;
1223 /* True if we should ignore non-base-alphabetic characters. */
1224 bool ignore_garbage = false;
1225 /* Wrap encoded data around the 76th column, by default. */
1226 idx_t wrap_column = 76;
1228 #if BASE_TYPE == 42
1229 int base_type = 0;
1230 #endif
1232 initialize_main (&argc, &argv);
1233 set_program_name (argv[0]);
1234 setlocale (LC_ALL, "");
1235 bindtextdomain (PACKAGE, LOCALEDIR);
1236 textdomain (PACKAGE);
1238 atexit (close_stdout);
1240 while ((opt = getopt_long (argc, argv, "diw:", long_options, nullptr)) != -1)
1241 switch (opt)
1243 case 'd':
1244 decode = true;
1245 break;
1247 case 'w':
1249 intmax_t w;
1250 strtol_error s_err = xstrtoimax (optarg, nullptr, 10, &w, "");
1251 if (LONGINT_OVERFLOW < s_err || w < 0)
1252 error (EXIT_FAILURE, 0, "%s: %s",
1253 _("invalid wrap size"), quote (optarg));
1254 wrap_column = s_err == LONGINT_OVERFLOW || IDX_MAX < w ? 0 : w;
1256 break;
1258 case 'i':
1259 ignore_garbage = true;
1260 break;
1262 #if BASE_TYPE == 42
1263 case BASE64_OPTION:
1264 case BASE64URL_OPTION:
1265 case BASE32_OPTION:
1266 case BASE32HEX_OPTION:
1267 case BASE16_OPTION:
1268 case BASE2MSBF_OPTION:
1269 case BASE2LSBF_OPTION:
1270 case Z85_OPTION:
1271 base_type = opt;
1272 break;
1273 #endif
1275 case_GETOPT_HELP_CHAR;
1277 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1279 default:
1280 usage (EXIT_FAILURE);
1281 break;
1284 #if BASE_TYPE == 42
1285 switch (base_type)
1287 case BASE64_OPTION:
1288 base_length = base64_length_wrapper;
1289 required_padding = base64_required_padding;
1290 isubase = isubase64;
1291 base_encode = base64_encode;
1292 base_decode_ctx_init = base64_decode_ctx_init_wrapper;
1293 base_decode_ctx = base64_decode_ctx_wrapper;
1294 break;
1296 case BASE64URL_OPTION:
1297 base_length = base64_length_wrapper;
1298 required_padding = base64_required_padding;
1299 isubase = isubase64url;
1300 base_encode = base64url_encode;
1301 base_decode_ctx_init = base64url_decode_ctx_init_wrapper;
1302 base_decode_ctx = base64url_decode_ctx_wrapper;
1303 break;
1305 case BASE32_OPTION:
1306 base_length = base32_length_wrapper;
1307 required_padding = base32_required_padding;
1308 isubase = isubase32;
1309 base_encode = base32_encode;
1310 base_decode_ctx_init = base32_decode_ctx_init_wrapper;
1311 base_decode_ctx = base32_decode_ctx_wrapper;
1312 break;
1314 case BASE32HEX_OPTION:
1315 base_length = base32_length_wrapper;
1316 required_padding = base32_required_padding;
1317 isubase = isubase32hex;
1318 base_encode = base32hex_encode;
1319 base_decode_ctx_init = base32hex_decode_ctx_init_wrapper;
1320 base_decode_ctx = base32hex_decode_ctx_wrapper;
1321 break;
1323 case BASE16_OPTION:
1324 base_length = base16_length;
1325 required_padding = no_required_padding;
1326 isubase = isubase16;
1327 base_encode = base16_encode;
1328 base_decode_ctx_init = base16_decode_ctx_init;
1329 base_decode_ctx = base16_decode_ctx;
1330 break;
1332 case BASE2MSBF_OPTION:
1333 base_length = base2_length;
1334 required_padding = no_required_padding;
1335 isubase = isubase2;
1336 base_encode = base2msbf_encode;
1337 base_decode_ctx_init = base2_decode_ctx_init;
1338 base_decode_ctx = base2msbf_decode_ctx;
1339 break;
1341 case BASE2LSBF_OPTION:
1342 base_length = base2_length;
1343 required_padding = no_required_padding;
1344 isubase = isubase2;
1345 base_encode = base2lsbf_encode;
1346 base_decode_ctx_init = base2_decode_ctx_init;
1347 base_decode_ctx = base2lsbf_decode_ctx;
1348 break;
1350 case Z85_OPTION:
1351 base_length = z85_length;
1352 required_padding = no_required_padding;
1353 isubase = isuz85;
1354 base_encode = z85_encode;
1355 base_decode_ctx_init = z85_decode_ctx_init;
1356 base_decode_ctx = z85_decode_ctx;
1357 break;
1359 default:
1360 error (0, 0, _("missing encoding type"));
1361 usage (EXIT_FAILURE);
1363 #endif
1365 if (argc - optind > 1)
1367 error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
1368 usage (EXIT_FAILURE);
1371 if (optind < argc)
1372 infile = argv[optind];
1373 else
1374 infile = "-";
1376 if (STREQ (infile, "-"))
1378 xset_binary_mode (STDIN_FILENO, O_BINARY);
1379 input_fh = stdin;
1381 else
1383 input_fh = fopen (infile, "rb");
1384 if (input_fh == nullptr)
1385 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1388 fadvise (input_fh, FADVISE_SEQUENTIAL);
1390 if (decode)
1391 do_decode (input_fh, infile, stdout, ignore_garbage);
1392 else
1393 do_encode (input_fh, infile, stdout, wrap_column);