tests: cleanup trapping of signal names
[coreutils.git] / src / numfmt.c
blob109947e20253eecb17e2e9e739141b228e7637aa
1 /* Reformat numbers like 11505426432 to the more human-readable 11G
2 Copyright (C) 2012-2015 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
18 #include <float.h>
19 #include <getopt.h>
20 #include <stdio.h>
21 #include <sys/types.h>
22 #include <langinfo.h>
24 #include "mbsalign.h"
25 #include "argmatch.h"
26 #include "c-ctype.h"
27 #include "error.h"
28 #include "quote.h"
29 #include "system.h"
30 #include "xstrtol.h"
31 #include "xstrndup.h"
33 #include "set-fields.h"
35 #if HAVE_FPSETPREC
36 # include <ieeefp.h>
37 #endif
39 /* The official name of this program (e.g., no 'g' prefix). */
40 #define PROGRAM_NAME "numfmt"
42 #define AUTHORS proper_name ("Assaf Gordon")
44 /* Exit code when some numbers fail to convert. */
45 enum { EXIT_CONVERSION_WARNINGS = 2 };
47 enum
49 FROM_OPTION = CHAR_MAX + 1,
50 FROM_UNIT_OPTION,
51 TO_OPTION,
52 TO_UNIT_OPTION,
53 ROUND_OPTION,
54 SUFFIX_OPTION,
55 GROUPING_OPTION,
56 PADDING_OPTION,
57 FIELD_OPTION,
58 DEBUG_OPTION,
59 DEV_DEBUG_OPTION,
60 HEADER_OPTION,
61 FORMAT_OPTION,
62 INVALID_OPTION
65 enum scale_type
67 scale_none, /* the default: no scaling. */
68 scale_auto, /* --from only. */
69 scale_SI,
70 scale_IEC,
71 scale_IEC_I /* 'i' suffix is required. */
74 static char const *const scale_from_args[] =
76 "none", "auto", "si", "iec", "iec-i", NULL
79 static enum scale_type const scale_from_types[] =
81 scale_none, scale_auto, scale_SI, scale_IEC, scale_IEC_I
84 static char const *const scale_to_args[] =
86 "none", "si", "iec", "iec-i", NULL
89 static enum scale_type const scale_to_types[] =
91 scale_none, scale_SI, scale_IEC, scale_IEC_I
95 enum round_type
97 round_ceiling,
98 round_floor,
99 round_from_zero,
100 round_to_zero,
101 round_nearest,
104 static char const *const round_args[] =
106 "up", "down", "from-zero", "towards-zero", "nearest", NULL
109 static enum round_type const round_types[] =
111 round_ceiling, round_floor, round_from_zero, round_to_zero, round_nearest
115 enum inval_type
117 inval_abort,
118 inval_fail,
119 inval_warn,
120 inval_ignore
123 static char const *const inval_args[] =
125 "abort", "fail", "warn", "ignore", NULL
128 static enum inval_type const inval_types[] =
130 inval_abort, inval_fail, inval_warn, inval_ignore
133 static struct option const longopts[] =
135 {"from", required_argument, NULL, FROM_OPTION},
136 {"from-unit", required_argument, NULL, FROM_UNIT_OPTION},
137 {"to", required_argument, NULL, TO_OPTION},
138 {"to-unit", required_argument, NULL, TO_UNIT_OPTION},
139 {"round", required_argument, NULL, ROUND_OPTION},
140 {"padding", required_argument, NULL, PADDING_OPTION},
141 {"suffix", required_argument, NULL, SUFFIX_OPTION},
142 {"grouping", no_argument, NULL, GROUPING_OPTION},
143 {"delimiter", required_argument, NULL, 'd'},
144 {"field", required_argument, NULL, FIELD_OPTION},
145 {"debug", no_argument, NULL, DEBUG_OPTION},
146 {"-debug", no_argument, NULL, DEV_DEBUG_OPTION},
147 {"header", optional_argument, NULL, HEADER_OPTION},
148 {"format", required_argument, NULL, FORMAT_OPTION},
149 {"invalid", required_argument, NULL, INVALID_OPTION},
150 {GETOPT_HELP_OPTION_DECL},
151 {GETOPT_VERSION_OPTION_DECL},
152 {NULL, 0, NULL, 0}
155 /* If delimiter has this value, blanks separate fields. */
156 enum { DELIMITER_DEFAULT = CHAR_MAX + 1 };
158 /* Maximum number of digits we can safely handle
159 without precision loss, if scaling is 'none'. */
160 enum { MAX_UNSCALED_DIGITS = LDBL_DIG };
162 /* Maximum number of digits we can work with.
163 This is equivalent to 999Y.
164 NOTE: 'long double' can handle more than that, but there's
165 no official suffix assigned beyond Yotta (1000^8). */
166 enum { MAX_ACCEPTABLE_DIGITS = 27 };
168 static enum scale_type scale_from = scale_none;
169 static enum scale_type scale_to = scale_none;
170 static enum round_type round_style = round_from_zero;
171 static enum inval_type inval_style = inval_abort;
172 static const char *suffix = NULL;
173 static uintmax_t from_unit_size = 1;
174 static uintmax_t to_unit_size = 1;
175 static int grouping = 0;
176 static char *padding_buffer = NULL;
177 static size_t padding_buffer_size = 0;
178 static long int padding_width = 0;
179 static long int zero_padding_width = 0;
180 static long int user_precision = -1;
181 static const char *format_str = NULL;
182 static char *format_str_prefix = NULL;
183 static char *format_str_suffix = NULL;
185 /* By default, any conversion error will terminate the program. */
186 static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
189 /* auto-pad each line based on skipped whitespace. */
190 static int auto_padding = 0;
191 static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
192 static int delimiter = DELIMITER_DEFAULT;
194 /* if non-zero, the first 'header' lines from STDIN are skipped. */
195 static uintmax_t header = 0;
197 /* Debug for users: print warnings to STDERR about possible
198 error (similar to sort's debug). */
199 static bool debug;
201 /* will be set according to the current locale. */
202 static const char *decimal_point;
203 static int decimal_point_length;
205 /* debugging for developers. Enables devmsg(). */
206 static bool dev_debug = false;
208 static inline int
209 default_scale_base (enum scale_type scale)
211 switch (scale)
213 case scale_IEC:
214 case scale_IEC_I:
215 return 1024;
217 case scale_none:
218 case scale_auto:
219 case scale_SI:
220 default:
221 return 1000;
225 static inline int
226 valid_suffix (const char suf)
228 static const char *valid_suffixes = "KMGTPEZY";
229 return (strchr (valid_suffixes, suf) != NULL);
232 static inline int
233 suffix_power (const char suf)
235 switch (suf)
237 case 'K': /* kilo or kibi. */
238 return 1;
240 case 'M': /* mega or mebi. */
241 return 2;
243 case 'G': /* giga or gibi. */
244 return 3;
246 case 'T': /* tera or tebi. */
247 return 4;
249 case 'P': /* peta or pebi. */
250 return 5;
252 case 'E': /* exa or exbi. */
253 return 6;
255 case 'Z': /* zetta or 2**70. */
256 return 7;
258 case 'Y': /* yotta or 2**80. */
259 return 8;
261 default: /* should never happen. assert? */
262 return 0;
266 static inline const char *
267 suffix_power_char (unsigned int power)
269 switch (power)
271 case 0:
272 return "";
274 case 1:
275 return "K";
277 case 2:
278 return "M";
280 case 3:
281 return "G";
283 case 4:
284 return "T";
286 case 5:
287 return "P";
289 case 6:
290 return "E";
292 case 7:
293 return "Z";
295 case 8:
296 return "Y";
298 default:
299 return "(error)";
303 /* Similar to 'powl(3)' but without requiring 'libm'. */
304 static long double
305 powerld (long double base, unsigned int x)
307 long double result = base;
308 if (x == 0)
309 return 1; /* note for test coverage: this is never
310 reached, as 'powerld' won't be called if
311 there's no suffix, hence, no "power". */
313 /* TODO: check for overflow, inf? */
314 while (--x)
315 result *= base;
316 return result;
319 /* Similar to 'fabs(3)' but without requiring 'libm'. */
320 static inline long double
321 absld (long double val)
323 return val < 0 ? -val : val;
326 /* Scale down 'val', returns 'updated val' and 'x', such that
327 val*base^X = original val
328 Similar to "frexpl(3)" but without requiring 'libm',
329 allowing only integer scale, limited functionality and error checking. */
330 static long double
331 expld (long double val, unsigned int base, unsigned int /*output */ *x)
333 unsigned int power = 0;
335 if (val >= -LDBL_MAX && val <= LDBL_MAX)
337 while (absld (val) >= base)
339 ++power;
340 val /= base;
343 if (x)
344 *x = power;
345 return val;
348 /* EXTREMELY limited 'ceil' - without 'libm'.
349 Assumes values that fit in intmax_t. */
350 static inline intmax_t
351 simple_round_ceiling (long double val)
353 intmax_t intval = val;
354 if (intval < val)
355 intval++;
356 return intval;
359 /* EXTREMELY limited 'floor' - without 'libm'.
360 Assumes values that fit in intmax_t. */
361 static inline intmax_t
362 simple_round_floor (long double val)
364 return -simple_round_ceiling (-val);
367 /* EXTREMELY limited 'round away from zero'.
368 Assumes values that fit in intmax_t. */
369 static inline intmax_t
370 simple_round_from_zero (long double val)
372 return val < 0 ? simple_round_floor (val) : simple_round_ceiling (val);
375 /* EXTREMELY limited 'round away to zero'.
376 Assumes values that fit in intmax_t. */
377 static inline intmax_t
378 simple_round_to_zero (long double val)
380 return val;
383 /* EXTREMELY limited 'round' - without 'libm'.
384 Assumes values that fit in intmax_t. */
385 static inline intmax_t
386 simple_round_nearest (long double val)
388 return val < 0 ? val - 0.5 : val + 0.5;
391 static inline long double _GL_ATTRIBUTE_CONST
392 simple_round (long double val, enum round_type t)
394 intmax_t rval;
395 intmax_t intmax_mul = val / INTMAX_MAX;
396 val -= (long double) INTMAX_MAX * intmax_mul;
398 switch (t)
400 case round_ceiling:
401 rval = simple_round_ceiling (val);
402 break;
404 case round_floor:
405 rval = simple_round_floor (val);
406 break;
408 case round_from_zero:
409 rval = simple_round_from_zero (val);
410 break;
412 case round_to_zero:
413 rval = simple_round_to_zero (val);
414 break;
416 case round_nearest:
417 rval = simple_round_nearest (val);
418 break;
420 default:
421 /* to silence the compiler - this should never happen. */
422 return 0;
425 return (long double) INTMAX_MAX * intmax_mul + rval;
428 enum simple_strtod_error
430 SSE_OK = 0,
431 SSE_OK_PRECISION_LOSS,
432 SSE_OVERFLOW,
433 SSE_INVALID_NUMBER,
435 /* the following are returned by 'simple_strtod_human'. */
436 SSE_VALID_BUT_FORBIDDEN_SUFFIX,
437 SSE_INVALID_SUFFIX,
438 SSE_MISSING_I_SUFFIX
441 /* Read an *integer* INPUT_STR,
442 but return the integer value in a 'long double' VALUE
443 hence, no UINTMAX_MAX limitation.
444 NEGATIVE is updated, and is stored separately from the VALUE
445 so that signbit() isn't required to determine the sign of -0..
446 ENDPTR is required (unlike strtod) and is used to store a pointer
447 to the character after the last character used in the conversion.
449 Note locale'd grouping is not supported,
450 nor is skipping of white-space supported.
452 Returns:
453 SSE_OK - valid number.
454 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
455 SSE_OVERFLOW - if more than 27 digits (999Y) were used.
456 SSE_INVALID_NUMBER - if no digits were found. */
457 static enum simple_strtod_error
458 simple_strtod_int (const char *input_str,
459 char **endptr, long double *value, bool *negative)
461 enum simple_strtod_error e = SSE_OK;
463 long double val = 0;
464 unsigned int digits = 0;
465 bool found_digit = false;
467 if (*input_str == '-')
469 input_str++;
470 *negative = true;
472 else
473 *negative = false;
475 *endptr = (char *) input_str;
476 while (*endptr && c_isdigit (**endptr))
478 int digit = (**endptr) - '0';
480 found_digit = true;
482 if (val || digit)
483 digits++;
485 if (digits > MAX_UNSCALED_DIGITS)
486 e = SSE_OK_PRECISION_LOSS;
488 if (digits > MAX_ACCEPTABLE_DIGITS)
489 return SSE_OVERFLOW;
491 val *= 10;
492 val += digit;
494 ++(*endptr);
496 if (! found_digit
497 && ! STREQ_LEN (*endptr, decimal_point, decimal_point_length))
498 return SSE_INVALID_NUMBER;
499 if (*negative)
500 val = -val;
502 if (value)
503 *value = val;
505 return e;
508 /* Read a floating-point INPUT_STR represented as "NNNN[.NNNNN]",
509 and return the value in a 'long double' VALUE.
510 ENDPTR is required (unlike strtod) and is used to store a pointer
511 to the character after the last character used in the conversion.
512 PRECISION is optional and used to indicate fractions are present.
514 Note locale'd grouping is not supported,
515 nor is skipping of white-space supported.
517 Returns:
518 SSE_OK - valid number.
519 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
520 SSE_OVERFLOW - if more than 27 digits (999Y) were used.
521 SSE_INVALID_NUMBER - if no digits were found. */
522 static enum simple_strtod_error
523 simple_strtod_float (const char *input_str,
524 char **endptr,
525 long double *value,
526 size_t *precision)
528 bool negative;
529 enum simple_strtod_error e = SSE_OK;
531 if (precision)
532 *precision = 0;
534 /* TODO: accept locale'd grouped values for the integral part. */
535 e = simple_strtod_int (input_str, endptr, value, &negative);
536 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
537 return e;
539 /* optional decimal point + fraction. */
540 if (STREQ_LEN (*endptr, decimal_point, decimal_point_length))
542 char *ptr2;
543 long double val_frac = 0;
544 bool neg_frac;
546 (*endptr) += decimal_point_length;
547 enum simple_strtod_error e2 =
548 simple_strtod_int (*endptr, &ptr2, &val_frac, &neg_frac);
549 if (e2 != SSE_OK && e2 != SSE_OK_PRECISION_LOSS)
550 return e2;
551 if (e2 == SSE_OK_PRECISION_LOSS)
552 e = e2; /* propagate warning. */
553 if (neg_frac)
554 return SSE_INVALID_NUMBER;
556 /* number of digits in the fractions. */
557 size_t exponent = ptr2 - *endptr;
559 val_frac = ((long double) val_frac) / powerld (10, exponent);
561 /* TODO: detect loss of precision (only really 18 digits
562 of precision across all digits (before and after '.')). */
563 if (value)
565 if (negative)
566 *value -= val_frac;
567 else
568 *value += val_frac;
571 if (precision)
572 *precision = exponent;
574 *endptr = ptr2;
576 return e;
579 /* Read a 'human' INPUT_STR represented as "NNNN[.NNNNN] + suffix",
580 and return the value in a 'long double' VALUE,
581 with the precision of the input returned in PRECISION.
582 ENDPTR is required (unlike strtod) and is used to store a pointer
583 to the character after the last character used in the conversion.
584 ALLOWED_SCALING determines the scaling supported.
586 TODO:
587 support locale'd grouping
588 accept scentific and hex floats (probably use strtold directly)
590 Returns:
591 SSE_OK - valid number.
592 SSE_OK_PRECISION_LOSS - if more than LDBL_DIG digits were used.
593 SSE_OVERFLOW - if more than 27 digits (999Y) were used.
594 SSE_INVALID_NUMBER - if no digits were found.
595 SSE_VALID_BUT_FORBIDDEN_SUFFIX
596 SSE_INVALID_SUFFIX
597 SSE_MISSING_I_SUFFIX */
598 static enum simple_strtod_error
599 simple_strtod_human (const char *input_str,
600 char **endptr, long double *value, size_t *precision,
601 enum scale_type allowed_scaling)
603 int power = 0;
604 /* 'scale_auto' is checked below. */
605 int scale_base = default_scale_base (allowed_scaling);
607 devmsg ("simple_strtod_human:\n input string: %s\n"
608 " locale decimal-point: %s\n"
609 " MAX_UNSCALED_DIGITS: %d\n",
610 quote_n (0, input_str),
611 quote_n (1, decimal_point),
612 MAX_UNSCALED_DIGITS);
614 enum simple_strtod_error e =
615 simple_strtod_float (input_str, endptr, value, precision);
616 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
617 return e;
619 devmsg (" parsed numeric value: %Lf\n"
620 " input precision = %d\n", *value, (int)*precision);
622 if (**endptr != '\0')
624 /* process suffix. */
626 /* Skip any blanks between the number and suffix. */
627 while (isblank (to_uchar (**endptr)))
628 (*endptr)++;
630 if (!valid_suffix (**endptr))
631 return SSE_INVALID_SUFFIX;
633 if (allowed_scaling == scale_none)
634 return SSE_VALID_BUT_FORBIDDEN_SUFFIX;
636 power = suffix_power (**endptr);
637 (*endptr)++; /* skip first suffix character. */
639 if (allowed_scaling == scale_auto && **endptr == 'i')
641 /* auto-scaling enabled, and the first suffix character
642 is followed by an 'i' (e.g. Ki, Mi, Gi). */
643 scale_base = 1024;
644 (*endptr)++; /* skip second ('i') suffix character. */
645 devmsg (" Auto-scaling, found 'i', switching to base %d\n",
646 scale_base);
649 *precision = 0; /* Reset, to select precision based on scale. */
652 if (allowed_scaling == scale_IEC_I)
654 if (**endptr == 'i')
655 (*endptr)++;
656 else
657 return SSE_MISSING_I_SUFFIX;
660 long double multiplier = powerld (scale_base, power);
662 devmsg (" suffix power=%d^%d = %Lf\n", scale_base, power, multiplier);
664 /* TODO: detect loss of precision and overflows. */
665 (*value) = (*value) * multiplier;
667 devmsg (" returning value: %Lf (%LG)\n", *value, *value);
669 return e;
673 static void
674 simple_strtod_fatal (enum simple_strtod_error err, char const *input_str)
676 char const *msgid = NULL;
678 switch (err)
680 case SSE_OK_PRECISION_LOSS:
681 case SSE_OK:
682 /* should never happen - this function isn't called when OK. */
683 abort ();
685 case SSE_OVERFLOW:
686 msgid = N_("value too large to be converted: %s");
687 break;
689 case SSE_INVALID_NUMBER:
690 msgid = N_("invalid number: %s");
691 break;
693 case SSE_VALID_BUT_FORBIDDEN_SUFFIX:
694 msgid = N_("rejecting suffix in input: %s (consider using --from)");
695 break;
697 case SSE_INVALID_SUFFIX:
698 msgid = N_("invalid suffix in input: %s");
699 break;
701 case SSE_MISSING_I_SUFFIX:
702 msgid = N_("missing 'i' suffix in input: %s (e.g Ki/Mi/Gi)");
703 break;
707 if (inval_style != inval_ignore)
708 error (conv_exit_code, 0, gettext (msgid), quote (input_str));
711 /* Convert VAL to a human format string in BUF. */
712 static void
713 double_to_human (long double val, int precision,
714 char *buf, size_t buf_size,
715 enum scale_type scale, int group, enum round_type round)
717 int num_size;
718 char fmt[64];
719 verify (sizeof (fmt) > (INT_BUFSIZE_BOUND (zero_padding_width)
720 + INT_BUFSIZE_BOUND (precision)
721 + 10 /* for %.Lf etc. */));
723 char *pfmt = fmt;
724 *pfmt++ = '%';
726 if (group)
727 *pfmt++ = '\'';
729 if (zero_padding_width)
730 pfmt += snprintf (pfmt, sizeof (fmt) - 2, "0%ld", zero_padding_width);
732 devmsg ("double_to_human:\n");
734 if (scale == scale_none)
736 val *= powerld (10, precision);
737 val = simple_round (val, round);
738 val /= powerld (10, precision);
740 devmsg ((group) ?
741 " no scaling, returning (grouped) value: %'.*Lf\n" :
742 " no scaling, returning value: %.*Lf\n", precision, val);
744 stpcpy (pfmt, ".*Lf");
746 num_size = snprintf (buf, buf_size, fmt, precision, val);
747 if (num_size < 0 || num_size >= (int) buf_size)
748 error (EXIT_FAILURE, 0,
749 _("failed to prepare value '%Lf' for printing"), val);
750 return;
753 /* Scaling requested by user. */
754 double scale_base = default_scale_base (scale);
756 /* Normalize val to scale. */
757 unsigned int power = 0;
758 val = expld (val, scale_base, &power);
759 devmsg (" scaled value to %Lf * %0.f ^ %u\n", val, scale_base, power);
761 /* Perform rounding. */
762 unsigned int power_adjust = 0;
763 if (user_precision != -1)
764 power_adjust = MIN (power * 3, user_precision);
765 else if (absld (val) < 10)
767 /* for values less than 10, we allow one decimal-point digit,
768 so adjust before rounding. */
769 power_adjust = 1;
772 val *= powerld (10, power_adjust);
773 val = simple_round (val, round);
774 val /= powerld (10, power_adjust);
776 /* two special cases after rounding:
777 1. a "999.99" can turn into 1000 - so scale down
778 2. a "9.99" can turn into 10 - so don't display decimal-point. */
779 if (absld (val) >= scale_base)
781 val /= scale_base;
782 power++;
785 /* should "7.0" be printed as "7" ?
786 if removing the ".0" is preferred, enable the fourth condition. */
787 int show_decimal_point = (val != 0) && (absld (val) < 10) && (power > 0);
788 /* && (absld (val) > simple_round_floor (val))) */
790 devmsg (" after rounding, value=%Lf * %0.f ^ %u\n", val, scale_base, power);
792 stpcpy (pfmt, ".*Lf%s");
794 int prec = user_precision == -1 ? show_decimal_point : user_precision;
796 /* buf_size - 1 used here to ensure place for possible scale_IEC_I suffix. */
797 num_size = snprintf (buf, buf_size - 1, fmt, prec, val,
798 suffix_power_char (power));
799 if (num_size < 0 || num_size >= (int) buf_size - 1)
800 error (EXIT_FAILURE, 0,
801 _("failed to prepare value '%Lf' for printing"), val);
803 if (scale == scale_IEC_I && power > 0)
804 strncat (buf, "i", buf_size - num_size - 1);
806 devmsg (" returning value: %s\n", quote (buf));
808 return;
811 /* Convert a string of decimal digits, N_STRING, with an optional suffix
812 to an integral value. Suffixes are handled as with --from=auto.
813 Upon successful conversion, return that value.
814 If it cannot be converted, give a diagnostic and exit. */
815 static uintmax_t
816 unit_to_umax (const char *n_string)
818 strtol_error s_err;
819 const char *c_string = n_string;
820 char *t_string = NULL;
821 size_t n_len = strlen (n_string);
822 char *end = NULL;
823 uintmax_t n;
824 const char *suffixes = "KMGTPEZY";
826 /* Adjust suffixes so K=1000, Ki=1024, KiB=invalid. */
827 if (n_len && ! c_isdigit (n_string[n_len - 1]))
829 t_string = xmalloc (n_len + 2);
830 end = t_string + n_len - 1;
831 memcpy (t_string, n_string, n_len);
833 if (*end == 'i' && 2 <= n_len && ! c_isdigit (*(end - 1)))
834 *end = '\0';
835 else
837 *++end = 'B';
838 *++end = '\0';
839 suffixes = "KMGTPEZY0";
842 c_string = t_string;
845 s_err = xstrtoumax (c_string, &end, 10, &n, suffixes);
847 if (s_err != LONGINT_OK || *end || n == 0)
849 free (t_string);
850 error (EXIT_FAILURE, 0, _("invalid unit size: %s"), quote (n_string));
853 free (t_string);
855 return n;
859 static void
860 setup_padding_buffer (size_t min_size)
862 if (padding_buffer_size > min_size)
863 return;
865 padding_buffer_size = min_size + 1;
866 padding_buffer = xrealloc (padding_buffer, padding_buffer_size);
869 void
870 usage (int status)
872 if (status != EXIT_SUCCESS)
873 emit_try_help ();
874 else
876 printf (_("\
877 Usage: %s [OPTION]... [NUMBER]...\n\
878 "), program_name);
879 fputs (_("\
880 Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\
881 "), stdout);
882 emit_mandatory_arg_note ();
883 fputs (_("\
884 --debug print warnings about invalid input\n\
885 "), stdout);
886 fputs (_("\
887 -d, --delimiter=X use X instead of whitespace for field delimiter\n\
888 "), stdout);
889 fputs (_("\
890 --field=FIELDS replace the numbers in these input fields (default=1)\n\
891 see FIELDS below\n\
892 "), stdout);
893 fputs (_("\
894 --format=FORMAT use printf style floating-point FORMAT;\n\
895 see FORMAT below for details\n\
896 "), stdout);
897 fputs (_("\
898 --from=UNIT auto-scale input numbers to UNITs; default is 'none';\n\
899 see UNIT below\n\
900 "), stdout);
901 fputs (_("\
902 --from-unit=N specify the input unit size (instead of the default 1)\n\
903 "), stdout);
904 fputs (_("\
905 --grouping use locale-defined grouping of digits, e.g. 1,000,000\n\
906 (which means it has no effect in the C/POSIX locale)\n\
907 "), stdout);
908 fputs (_("\
909 --header[=N] print (without converting) the first N header lines;\n\
910 N defaults to 1 if not specified\n\
911 "), stdout);
912 fputs (_("\
913 --invalid=MODE failure mode for invalid numbers: MODE can be:\n\
914 abort (default), fail, warn, ignore\n\
915 "), stdout);
916 fputs (_("\
917 --padding=N pad the output to N characters; positive N will\n\
918 right-align; negative N will left-align;\n\
919 padding is ignored if the output is wider than N;\n\
920 the default is to automatically pad if a whitespace\n\
921 is found\n\
922 "), stdout);
923 fputs (_("\
924 --round=METHOD use METHOD for rounding when scaling; METHOD can be:\n\
925 up, down, from-zero (default), towards-zero, nearest\n\
926 "), stdout);
927 fputs (_("\
928 --suffix=SUFFIX add SUFFIX to output numbers, and accept optional\n\
929 SUFFIX in input numbers\n\
930 "), stdout);
931 fputs (_("\
932 --to=UNIT auto-scale output numbers to UNITs; see UNIT below\n\
933 "), stdout);
934 fputs (_("\
935 --to-unit=N the output unit size (instead of the default 1)\n\
936 "), stdout);
938 fputs (HELP_OPTION_DESCRIPTION, stdout);
939 fputs (VERSION_OPTION_DESCRIPTION, stdout);
941 fputs (_("\
943 UNIT options:\n"), stdout);
944 fputs (_("\
945 none no auto-scaling is done; suffixes will trigger an error\n\
946 "), stdout);
947 fputs (_("\
948 auto accept optional single/two letter suffix:\n\
949 1K = 1000,\n\
950 1Ki = 1024,\n\
951 1M = 1000000,\n\
952 1Mi = 1048576,\n"), stdout);
953 fputs (_("\
954 si accept optional single letter suffix:\n\
955 1K = 1000,\n\
956 1M = 1000000,\n\
957 ...\n"), stdout);
958 fputs (_("\
959 iec accept optional single letter suffix:\n\
960 1K = 1024,\n\
961 1M = 1048576,\n\
962 ...\n"), stdout);
963 fputs (_("\
964 iec-i accept optional two-letter suffix:\n\
965 1Ki = 1024,\n\
966 1Mi = 1048576,\n\
967 ...\n"), stdout);
969 fputs (_("\n\
970 FIELDS supports cut(1) style field ranges:\n\
971 N N'th field, counted from 1\n\
972 N- from N'th field, to end of line\n\
973 N-M from N'th to M'th field (inclusive)\n\
974 -M from first to M'th field (inclusive)\n\
975 - all fields\n\
976 Multiple fields/ranges can be separated with commas\n\
977 "), stdout);
979 fputs (_("\n\
980 FORMAT must be suitable for printing one floating-point argument '%f'.\n\
981 Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
982 Optional width value (%10f) will pad output. Optional zero (%010f) width\n\
983 will zero pad the number. Optional negative values (%-10f) will left align.\n\
984 Optional precision (%.1f) will override the input determined precision.\n\
985 "), stdout);
987 printf (_("\n\
988 Exit status is 0 if all input numbers were successfully converted.\n\
989 By default, %s will stop at the first conversion error with exit status 2.\n\
990 With --invalid='fail' a warning is printed for each conversion error\n\
991 and the exit status is 2. With --invalid='warn' each conversion error is\n\
992 diagnosed, but the exit status is 0. With --invalid='ignore' conversion\n\
993 errors are not diagnosed and the exit status is 0.\n\
994 "), program_name);
996 printf (_("\n\
997 Examples:\n\
998 $ %s --to=si 1000\n\
999 -> \"1.0K\"\n\
1000 $ %s --to=iec 2048\n\
1001 -> \"2.0K\"\n\
1002 $ %s --to=iec-i 4096\n\
1003 -> \"4.0Ki\"\n\
1004 $ echo 1K | %s --from=si\n\
1005 -> \"1000\"\n\
1006 $ echo 1K | %s --from=iec\n\
1007 -> \"1024\"\n\
1008 $ df -B1 | %s --header --field 2-4 --to=si\n\
1009 $ ls -l | %s --header --field 5 --to=iec\n\
1010 $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
1011 $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"),
1012 program_name, program_name, program_name,
1013 program_name, program_name, program_name,
1014 program_name, program_name, program_name);
1015 emit_ancillary_info (PROGRAM_NAME);
1017 exit (status);
1020 /* Given 'fmt' (a printf(3) compatible format string), extracts the following:
1021 1. padding (e.g. %20f)
1022 2. alignment (e.g. %-20f)
1023 3. grouping (e.g. %'f)
1025 Only a limited subset of printf(3) syntax is supported.
1027 TODO:
1028 support %e %g etc. rather than just %f
1030 NOTES:
1031 1. This function sets the global variables:
1032 padding_width, padding_alignment, grouping,
1033 format_str_prefix, format_str_suffix
1034 2. The function aborts on any errors. */
1035 static void
1036 parse_format_string (char const *fmt)
1038 size_t i;
1039 size_t prefix_len = 0;
1040 size_t suffix_pos;
1041 long int pad = 0;
1042 char *endptr = NULL;
1043 bool zero_padding = false;
1045 for (i = 0; !(fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1)
1047 if (!fmt[i])
1048 error (EXIT_FAILURE, 0,
1049 _("format %s has no %% directive"), quote (fmt));
1050 prefix_len++;
1053 i++;
1054 while (true)
1056 size_t skip = strspn (fmt + i, " ");
1057 i += skip;
1058 if (fmt[i] == '\'')
1060 grouping = 1;
1061 i++;
1063 else if (fmt[i] == '0')
1065 zero_padding = true;
1066 i++;
1068 else if (! skip)
1069 break;
1072 errno = 0;
1073 pad = strtol (fmt + i, &endptr, 10);
1074 if (errno == ERANGE)
1075 error (EXIT_FAILURE, 0,
1076 _("invalid format %s (width overflow)"), quote (fmt));
1078 if (endptr != (fmt + i) && pad != 0)
1080 if (debug && padding_width && !(zero_padding && pad > 0))
1081 error (0, 0, _("--format padding overriding --padding"));
1083 if (pad < 0)
1085 padding_alignment = MBS_ALIGN_LEFT;
1086 padding_width = -pad;
1088 else
1090 if (zero_padding)
1091 zero_padding_width = pad;
1092 else
1093 padding_width = pad;
1097 i = endptr - fmt;
1099 if (fmt[i] == '\0')
1100 error (EXIT_FAILURE, 0, _("format %s ends in %%"), quote (fmt));
1102 if (fmt[i] == '.')
1104 i++;
1105 errno = 0;
1106 user_precision = strtol (fmt + i, &endptr, 10);
1107 if (errno == ERANGE || user_precision < 0 || SIZE_MAX < user_precision
1108 || isblank (fmt[i]) || fmt[i] == '+')
1110 /* Note we disallow negative user_precision to be
1111 consistent with printf(1). POSIX states that
1112 negative precision is only supported (and ignored)
1113 when used with '.*f'. glibc at least will malform
1114 output when passed a direct negative precision. */
1115 error (EXIT_FAILURE, 0,
1116 _("invalid precision in format %s"), quote (fmt));
1118 i = endptr - fmt;
1121 if (fmt[i] != 'f')
1122 error (EXIT_FAILURE, 0, _("invalid format %s,"
1123 " directive must be %%[0]['][-][N][.][N]f"),
1124 quote (fmt));
1125 i++;
1126 suffix_pos = i;
1128 for (; fmt[i] != '\0'; i += (fmt[i] == '%') + 1)
1129 if (fmt[i] == '%' && fmt[i + 1] != '%')
1130 error (EXIT_FAILURE, 0, _("format %s has too many %% directives"),
1131 quote (fmt));
1133 if (prefix_len)
1134 format_str_prefix = xstrndup (fmt, prefix_len);
1135 if (fmt[suffix_pos] != '\0')
1136 format_str_suffix = xstrdup (fmt + suffix_pos);
1138 devmsg ("format String:\n input: %s\n grouping: %s\n"
1139 " padding width: %ld\n alignment: %s\n"
1140 " prefix: %s\n suffix: %s\n",
1141 quote_n (0, fmt), (grouping) ? "yes" : "no",
1142 padding_width,
1143 (padding_alignment == MBS_ALIGN_LEFT) ? "Left" : "Right",
1144 quote_n (1, format_str_prefix ? format_str_prefix : ""),
1145 quote_n (2, format_str_suffix ? format_str_suffix : ""));
1148 /* Parse a numeric value (with optional suffix) from a string.
1149 Returns a long double value, with input precision.
1151 If there's an error converting the string to value - exits with
1152 an error.
1154 If there are any trailing characters after the number
1155 (besides a valid suffix) - exits with an error. */
1156 static enum simple_strtod_error
1157 parse_human_number (const char *str, long double /*output */ *value,
1158 size_t *precision)
1160 char *ptr = NULL;
1162 enum simple_strtod_error e =
1163 simple_strtod_human (str, &ptr, value, precision, scale_from);
1164 if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
1166 simple_strtod_fatal (e, str);
1167 return e;
1170 if (ptr && *ptr != '\0')
1172 if (inval_style != inval_ignore)
1173 error (conv_exit_code, 0, _("invalid suffix in input %s: %s"),
1174 quote_n (0, str), quote_n (1, ptr));
1175 e = SSE_INVALID_SUFFIX;
1177 return e;
1181 /* Print the given VAL, using the requested representation.
1182 The number is printed to STDOUT, with padding and alignment. */
1183 static int
1184 prepare_padded_number (const long double val, size_t precision)
1186 /* Generate Output. */
1187 char buf[128];
1189 size_t precision_used = user_precision == -1 ? precision : user_precision;
1191 /* Can't reliably print too-large values without auto-scaling. */
1192 unsigned int x;
1193 expld (val, 10, &x);
1195 if (scale_to == scale_none
1196 && x + precision_used > MAX_UNSCALED_DIGITS)
1198 if (inval_style != inval_ignore)
1200 if (precision_used)
1201 error (conv_exit_code, 0,
1202 _("value/precision too large to be printed: '%Lg/%"PRIuMAX"'"
1203 " (consider using --to)"), val, (uintmax_t)precision_used);
1204 else
1205 error (conv_exit_code, 0,
1206 _("value too large to be printed: '%Lg'"
1207 " (consider using --to)"), val);
1209 return 0;
1212 if (x > MAX_ACCEPTABLE_DIGITS - 1)
1214 if (inval_style != inval_ignore)
1215 error (conv_exit_code, 0, _("value too large to be printed: '%Lg'"
1216 " (cannot handle values > 999Y)"), val);
1217 return 0;
1220 double_to_human (val, precision_used, buf, sizeof (buf),
1221 scale_to, grouping, round_style);
1222 if (suffix)
1223 strncat (buf, suffix, sizeof (buf) - strlen (buf) -1);
1225 devmsg ("formatting output:\n value: %Lf\n humanized: %s\n",
1226 val, quote (buf));
1228 if (padding_width && strlen (buf) < padding_width)
1230 size_t w = padding_width;
1231 mbsalign (buf, padding_buffer, padding_buffer_size, &w,
1232 padding_alignment, MBA_UNIBYTE_ONLY);
1234 devmsg (" After padding: %s\n", quote (padding_buffer));
1236 else
1238 setup_padding_buffer (strlen (buf) + 1);
1239 strcpy (padding_buffer, buf);
1242 return 1;
1245 static void
1246 print_padded_number (void)
1248 if (format_str_prefix)
1249 fputs (format_str_prefix, stdout);
1251 fputs (padding_buffer, stdout);
1253 if (format_str_suffix)
1254 fputs (format_str_suffix, stdout);
1257 /* Converts the TEXT number string to the requested representation,
1258 and handles automatic suffix addition. */
1259 static int
1260 process_suffixed_number (char *text, long double *result,
1261 size_t *precision, long int field)
1263 if (suffix && strlen (text) > strlen (suffix))
1265 char *possible_suffix = text + strlen (text) - strlen (suffix);
1267 if (STREQ (suffix, possible_suffix))
1269 /* trim suffix, ONLY if it's at the end of the text. */
1270 *possible_suffix = '\0';
1271 devmsg ("trimming suffix %s\n", quote (suffix));
1273 else
1274 devmsg ("no valid suffix found\n");
1277 /* Skip white space - always. */
1278 char *p = text;
1279 while (*p && isblank (to_uchar (*p)))
1280 ++p;
1281 const unsigned int skip_count = text - p;
1283 /* setup auto-padding. */
1284 if (auto_padding)
1286 if (skip_count > 0 || field > 1)
1288 padding_width = strlen (text);
1289 setup_padding_buffer (padding_width);
1291 else
1293 padding_width = 0;
1295 devmsg ("setting Auto-Padding to %ld characters\n", padding_width);
1298 long double val = 0;
1299 enum simple_strtod_error e = parse_human_number (p, &val, precision);
1300 if (e == SSE_OK_PRECISION_LOSS && debug)
1301 error (0, 0, _("large input value %s: possible precision loss"),
1302 quote (p));
1304 if (from_unit_size != 1 || to_unit_size != 1)
1305 val = (val * from_unit_size) / to_unit_size;
1307 *result = val;
1309 return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
1312 /* Return a pointer to the beginning of the next field in line.
1313 The line pointer is moved to the end of the next field. */
1314 static char*
1315 next_field (char **line)
1317 char *field_start = *line;
1318 char *field_end = field_start;
1320 if (delimiter != DELIMITER_DEFAULT)
1322 if (*field_start != delimiter)
1324 while (*field_end && *field_end != delimiter)
1325 ++field_end;
1327 /* else empty field */
1329 else
1331 /* keep any space prefix in the returned field */
1332 while (*field_end && isblank (to_uchar (*field_end)))
1333 ++field_end;
1335 while (*field_end && !isblank (to_uchar (*field_end)))
1336 ++field_end;
1339 *line = field_end;
1340 return field_start;
1343 static bool _GL_ATTRIBUTE_PURE
1344 include_field (size_t field)
1346 struct field_range_pair *p = frp;
1347 if (!p)
1348 return field == 1;
1350 while (p->lo != SIZE_MAX)
1352 if (p->lo <= field && p->hi >= field)
1353 return true;
1354 ++p;
1356 return false;
1359 /* Convert and output the given field. If it is not included in the set
1360 of fields to process just output the original */
1361 static bool
1362 process_field (char *text, size_t field)
1364 long double val = 0;
1365 size_t precision = 0;
1366 bool valid_number = true;
1368 if (include_field (field))
1370 valid_number =
1371 process_suffixed_number (text, &val, &precision, field);
1373 if (valid_number)
1374 valid_number = prepare_padded_number (val, precision);
1376 if (valid_number)
1377 print_padded_number ();
1378 else
1379 fputs (text, stdout);
1381 else
1382 fputs (text, stdout);
1384 return valid_number;
1387 /* Convert number in a given line of text.
1388 NEWLINE specifies whether to output a '\n' for this "line". */
1389 static int
1390 process_line (char *line, bool newline)
1392 char *next;
1393 size_t field = 0;
1394 bool valid_number = true;
1396 while (true) {
1397 ++field;
1398 next = next_field (&line);
1400 if (*line != '\0')
1402 /* nul terminate the current field string and process */
1403 *line = '\0';
1405 if (! process_field (next, field))
1406 valid_number = false;
1408 fputc ((delimiter == DELIMITER_DEFAULT) ?
1409 ' ' : delimiter, stdout);
1410 ++line;
1412 else
1414 /* end of the line, process the last field and finish */
1415 if (! process_field (next, field))
1416 valid_number = false;
1418 break;
1422 if (newline)
1423 putchar ('\n');
1425 return valid_number;
1429 main (int argc, char **argv)
1431 int valid_numbers = 1;
1432 bool locale_ok;
1434 initialize_main (&argc, &argv);
1435 set_program_name (argv[0]);
1436 locale_ok = setlocale (LC_ALL, "");
1437 bindtextdomain (PACKAGE, LOCALEDIR);
1438 textdomain (PACKAGE);
1440 #if HAVE_FPSETPREC
1441 /* Enabled extended precision if needed. */
1442 fpsetprec (FP_PE);
1443 #endif
1445 decimal_point = nl_langinfo (RADIXCHAR);
1446 if (decimal_point == NULL || strlen (decimal_point) == 0)
1447 decimal_point = ".";
1448 decimal_point_length = strlen (decimal_point);
1450 atexit (close_stdout);
1452 while (true)
1454 int c = getopt_long (argc, argv, "d:", longopts, NULL);
1456 if (c == -1)
1457 break;
1459 switch (c)
1461 case FROM_OPTION:
1462 scale_from = XARGMATCH ("--from", optarg,
1463 scale_from_args, scale_from_types);
1464 break;
1466 case FROM_UNIT_OPTION:
1467 from_unit_size = unit_to_umax (optarg);
1468 break;
1470 case TO_OPTION:
1471 scale_to =
1472 XARGMATCH ("--to", optarg, scale_to_args, scale_to_types);
1473 break;
1475 case TO_UNIT_OPTION:
1476 to_unit_size = unit_to_umax (optarg);
1477 break;
1479 case ROUND_OPTION:
1480 round_style = XARGMATCH ("--round", optarg, round_args, round_types);
1481 break;
1483 case GROUPING_OPTION:
1484 grouping = 1;
1485 break;
1487 case PADDING_OPTION:
1488 if (xstrtol (optarg, NULL, 10, &padding_width, "") != LONGINT_OK
1489 || padding_width == 0)
1490 error (EXIT_FAILURE, 0, _("invalid padding value %s"),
1491 quote (optarg));
1492 if (padding_width < 0)
1494 padding_alignment = MBS_ALIGN_LEFT;
1495 padding_width = -padding_width;
1497 /* TODO: We probably want to apply a specific --padding
1498 to --header lines too. */
1499 break;
1501 case FIELD_OPTION:
1502 if (n_frp)
1503 error (EXIT_FAILURE, 0, _("multiple field specifications"));
1504 set_fields (optarg, SETFLD_ALLOW_DASH);
1505 break;
1507 case 'd':
1508 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
1509 if (optarg[0] != '\0' && optarg[1] != '\0')
1510 error (EXIT_FAILURE, 0,
1511 _("the delimiter must be a single character"));
1512 delimiter = optarg[0];
1513 break;
1515 case SUFFIX_OPTION:
1516 suffix = optarg;
1517 break;
1519 case DEBUG_OPTION:
1520 debug = true;
1521 break;
1523 case DEV_DEBUG_OPTION:
1524 dev_debug = true;
1525 debug = true;
1526 break;
1528 case HEADER_OPTION:
1529 if (optarg)
1531 if (xstrtoumax (optarg, NULL, 10, &header, "") != LONGINT_OK
1532 || header == 0)
1533 error (EXIT_FAILURE, 0, _("invalid header value %s"),
1534 quote (optarg));
1536 else
1538 header = 1;
1540 break;
1542 case FORMAT_OPTION:
1543 format_str = optarg;
1544 break;
1546 case INVALID_OPTION:
1547 inval_style = XARGMATCH ("--invalid", optarg,
1548 inval_args, inval_types);
1549 break;
1551 case_GETOPT_HELP_CHAR;
1552 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1554 default:
1555 usage (EXIT_FAILURE);
1559 if (format_str != NULL && grouping)
1560 error (EXIT_FAILURE, 0, _("--grouping cannot be combined with --format"));
1562 if (debug && ! locale_ok)
1563 error (0, 0, _("failed to set locale"));
1565 /* Warn about no-op. */
1566 if (debug && scale_from == scale_none && scale_to == scale_none
1567 && !grouping && (padding_width == 0) && (format_str == NULL))
1568 error (0, 0, _("no conversion option specified"));
1570 if (format_str)
1571 parse_format_string (format_str);
1573 if (grouping)
1575 if (scale_to != scale_none)
1576 error (EXIT_FAILURE, 0, _("grouping cannot be combined with --to"));
1577 if (debug && (strlen (nl_langinfo (THOUSEP)) == 0))
1578 error (0, 0, _("grouping has no effect in this locale"));
1582 setup_padding_buffer (padding_width);
1583 auto_padding = (padding_width == 0 && delimiter == DELIMITER_DEFAULT);
1585 if (inval_style != inval_abort)
1586 conv_exit_code = 0;
1588 if (argc > optind)
1590 if (debug && header)
1591 error (0, 0, _("--header ignored with command-line input"));
1593 for (; optind < argc; optind++)
1594 valid_numbers &= process_line (argv[optind], true);
1596 else
1598 char *line = NULL;
1599 size_t line_allocated = 0;
1600 ssize_t len;
1602 while (header-- && getline (&line, &line_allocated, stdin) > 0)
1603 fputs (line, stdout);
1605 while ((len = getline (&line, &line_allocated, stdin)) > 0)
1607 bool newline = line[len - 1] == '\n';
1608 if (newline)
1609 line[len - 1] = '\0';
1610 valid_numbers &= process_line (line, newline);
1613 IF_LINT (free (line));
1615 if (ferror (stdin))
1616 error (0, errno, _("error reading input"));
1619 #ifdef lint
1620 free (padding_buffer);
1621 free (format_str_prefix);
1622 free (format_str_suffix);
1623 reset_fields ();
1624 #endif
1626 if (debug && !valid_numbers)
1627 error (0, 0, _("failed to convert some of the input numbers"));
1629 int exit_status = EXIT_SUCCESS;
1630 if (!valid_numbers
1631 && inval_style != inval_warn && inval_style != inval_ignore)
1632 exit_status = EXIT_CONVERSION_WARNINGS;
1634 return exit_status;