1 /* Reformat numbers like 11505426432 to the more human-readable 11G
2 Copyright (C) 2012-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
22 #include <sys/types.h>
29 #include "skipchars.h"
33 #include "set-fields.h"
39 /* The official name of this program (e.g., no 'g' prefix). */
40 #define PROGRAM_NAME "numfmt"
42 #define AUTHORS proper_name ("Assaf Gordon")
44 /* Exit code when some numbers fail to convert. */
45 enum { EXIT_CONVERSION_WARNINGS
= 2 };
49 FROM_OPTION
= CHAR_MAX
+ 1,
67 scale_none
, /* the default: no scaling. */
68 scale_auto
, /* --from only. */
71 scale_IEC_I
/* 'i' suffix is required. */
74 static char const *const scale_from_args
[] =
76 "none", "auto", "si", "iec", "iec-i", nullptr
79 static enum scale_type
const scale_from_types
[] =
81 scale_none
, scale_auto
, scale_SI
, scale_IEC
, scale_IEC_I
84 static char const *const scale_to_args
[] =
86 "none", "si", "iec", "iec-i", nullptr
89 static enum scale_type
const scale_to_types
[] =
91 scale_none
, scale_SI
, scale_IEC
, scale_IEC_I
104 static char const *const round_args
[] =
106 "up", "down", "from-zero", "towards-zero", "nearest", nullptr
109 static enum round_type
const round_types
[] =
111 round_ceiling
, round_floor
, round_from_zero
, round_to_zero
, round_nearest
123 static char const *const inval_args
[] =
125 "abort", "fail", "warn", "ignore", nullptr
128 static enum inval_type
const inval_types
[] =
130 inval_abort
, inval_fail
, inval_warn
, inval_ignore
133 static struct option
const longopts
[] =
135 {"from", required_argument
, nullptr, FROM_OPTION
},
136 {"from-unit", required_argument
, nullptr, FROM_UNIT_OPTION
},
137 {"to", required_argument
, nullptr, TO_OPTION
},
138 {"to-unit", required_argument
, nullptr, TO_UNIT_OPTION
},
139 {"round", required_argument
, nullptr, ROUND_OPTION
},
140 {"padding", required_argument
, nullptr, PADDING_OPTION
},
141 {"suffix", required_argument
, nullptr, SUFFIX_OPTION
},
142 {"grouping", no_argument
, nullptr, GROUPING_OPTION
},
143 {"delimiter", required_argument
, nullptr, 'd'},
144 {"field", required_argument
, nullptr, FIELD_OPTION
},
145 {"debug", no_argument
, nullptr, DEBUG_OPTION
},
146 {"-debug", no_argument
, nullptr, DEV_DEBUG_OPTION
},
147 {"header", optional_argument
, nullptr, HEADER_OPTION
},
148 {"format", required_argument
, nullptr, FORMAT_OPTION
},
149 {"invalid", required_argument
, nullptr, INVALID_OPTION
},
150 {"zero-terminated", no_argument
, nullptr, 'z'},
151 {GETOPT_HELP_OPTION_DECL
},
152 {GETOPT_VERSION_OPTION_DECL
},
153 {nullptr, 0, nullptr, 0}
156 /* If delimiter has this value, blanks separate fields. */
157 enum { DELIMITER_DEFAULT
= CHAR_MAX
+ 1 };
159 /* Maximum number of digits we can safely handle
160 without precision loss, if scaling is 'none'. */
161 enum { MAX_UNSCALED_DIGITS
= LDBL_DIG
};
163 /* Maximum number of digits we can work with.
164 This is equivalent to 999Q.
165 NOTE: 'long double' can handle more than that, but there's
166 no official suffix assigned beyond Quetta (1000^10). */
167 enum { MAX_ACCEPTABLE_DIGITS
= 33 };
169 static enum scale_type scale_from
= scale_none
;
170 static enum scale_type scale_to
= scale_none
;
171 static enum round_type round_style
= round_from_zero
;
172 static enum inval_type inval_style
= inval_abort
;
173 static char const *suffix
= nullptr;
174 static uintmax_t from_unit_size
= 1;
175 static uintmax_t to_unit_size
= 1;
176 static int grouping
= 0;
177 static char *padding_buffer
= nullptr;
178 static idx_t padding_buffer_size
= 0;
179 static intmax_t padding_width
= 0;
180 static int zero_padding_width
= 0;
181 static long int user_precision
= -1;
182 static char const *format_str
= nullptr;
183 static char *format_str_prefix
= nullptr;
184 static char *format_str_suffix
= nullptr;
186 /* By default, any conversion error will terminate the program. */
187 static int conv_exit_code
= EXIT_CONVERSION_WARNINGS
;
190 /* auto-pad each line based on skipped whitespace. */
191 static int auto_padding
= 0;
193 /* field delimiter */
194 static int delimiter
= DELIMITER_DEFAULT
;
196 /* line delimiter. */
197 static unsigned char line_delim
= '\n';
199 /* if non-zero, the first 'header' lines from STDIN are skipped. */
200 static uintmax_t header
= 0;
202 /* Debug for users: print warnings to STDERR about possible
203 error (similar to sort's debug). */
206 /* will be set according to the current locale. */
207 static char const *decimal_point
;
208 static int decimal_point_length
;
210 /* debugging for developers. Enables devmsg(). */
211 static bool dev_debug
= false;
215 default_scale_base (enum scale_type scale
)
231 static char const zero_and_valid_suffixes
[] = "0KkMGTPEZYRQ";
232 static char const *valid_suffixes
= 1 + zero_and_valid_suffixes
;
235 valid_suffix (const char suf
)
237 return strchr (valid_suffixes
, suf
) != nullptr;
241 suffix_power (const char suf
)
245 case 'k': /* kilo. */
246 case 'K': /* kilo or kibi. */
249 case 'M': /* mega or mebi. */
252 case 'G': /* giga or gibi. */
255 case 'T': /* tera or tebi. */
258 case 'P': /* peta or pebi. */
261 case 'E': /* exa or exbi. */
264 case 'Z': /* zetta or 2**70. */
267 case 'Y': /* yotta or 2**80. */
270 case 'R': /* ronna or 2**90. */
273 case 'Q': /* quetta or 2**100. */
276 default: /* should never happen. assert? */
281 static inline char const *
282 suffix_power_char (int power
)
324 /* Similar to 'powl(3)' but without requiring 'libm'. */
326 powerld (long double base
, int x
)
328 long double result
= base
;
330 return 1; /* note for test coverage: this is never
331 reached, as 'powerld' won't be called if
332 there's no suffix, hence, no "power". */
334 /* TODO: check for overflow, inf? */
340 /* Similar to 'fabs(3)' but without requiring 'libm'. */
341 static inline long double
342 absld (long double val
)
344 return val
< 0 ? -val
: val
;
347 /* Scale down 'val', returns 'updated val' and 'x', such that
348 val*base^X = original val
349 Similar to "frexpl(3)" but without requiring 'libm',
350 allowing only integer scale, limited functionality and error checking. */
352 expld (long double val
, int base
, int /*output */ *x
)
356 if (val
>= -LDBL_MAX
&& val
<= LDBL_MAX
)
358 while (absld (val
) >= base
)
369 /* EXTREMELY limited 'ceil' - without 'libm'.
370 Assumes values that fit in intmax_t. */
371 static inline intmax_t
372 simple_round_ceiling (long double val
)
374 intmax_t intval
= val
;
380 /* EXTREMELY limited 'floor' - without 'libm'.
381 Assumes values that fit in intmax_t. */
382 static inline intmax_t
383 simple_round_floor (long double val
)
385 return -simple_round_ceiling (-val
);
388 /* EXTREMELY limited 'round away from zero'.
389 Assumes values that fit in intmax_t. */
390 static inline intmax_t
391 simple_round_from_zero (long double val
)
393 return val
< 0 ? simple_round_floor (val
) : simple_round_ceiling (val
);
396 /* EXTREMELY limited 'round away to zero'.
397 Assumes values that fit in intmax_t. */
398 static inline intmax_t
399 simple_round_to_zero (long double val
)
404 /* EXTREMELY limited 'round' - without 'libm'.
405 Assumes values that fit in intmax_t. */
406 static inline intmax_t
407 simple_round_nearest (long double val
)
409 return val
< 0 ? val
- 0.5 : val
+ 0.5;
413 static inline long double
414 simple_round (long double val
, enum round_type t
)
417 intmax_t intmax_mul
= val
/ INTMAX_MAX
;
418 val
-= (long double) INTMAX_MAX
* intmax_mul
;
423 rval
= simple_round_ceiling (val
);
427 rval
= simple_round_floor (val
);
430 case round_from_zero
:
431 rval
= simple_round_from_zero (val
);
435 rval
= simple_round_to_zero (val
);
439 rval
= simple_round_nearest (val
);
443 /* to silence the compiler - this should never happen. */
447 return (long double) INTMAX_MAX
* intmax_mul
+ rval
;
450 enum simple_strtod_error
453 SSE_OK_PRECISION_LOSS
,
457 /* the following are returned by 'simple_strtod_human'. */
458 SSE_VALID_BUT_FORBIDDEN_SUFFIX
,
463 /* Read an *integer* INPUT_STR,
464 but return the integer value in a 'long double' VALUE
465 hence, no UINTMAX_MAX limitation.
466 NEGATIVE is updated, and is stored separately from the VALUE
467 so that signbit() isn't required to determine the sign of -0..
468 ENDPTR is required (unlike strtod) and is used to store a pointer
469 to the character after the last character used in the conversion.
471 Note locale'd grouping is not supported,
472 nor is skipping of white-space supported.
475 SSE_OK - valid number.
476 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
477 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
478 SSE_INVALID_NUMBER - if no digits were found. */
479 static enum simple_strtod_error
480 simple_strtod_int (char const *input_str
,
481 char **endptr
, long double *value
, bool *negative
)
483 enum simple_strtod_error e
= SSE_OK
;
487 bool found_digit
= false;
489 if (*input_str
== '-')
497 *endptr
= (char *) input_str
;
498 while (c_isdigit (**endptr
))
500 int digit
= (**endptr
) - '0';
507 if (digits
> MAX_UNSCALED_DIGITS
)
508 e
= SSE_OK_PRECISION_LOSS
;
510 if (digits
> MAX_ACCEPTABLE_DIGITS
)
519 && ! STREQ_LEN (*endptr
, decimal_point
, decimal_point_length
))
520 return SSE_INVALID_NUMBER
;
530 /* Read a floating-point INPUT_STR represented as "NNNN[.NNNNN]",
531 and return the value in a 'long double' VALUE.
532 ENDPTR is required (unlike strtod) and is used to store a pointer
533 to the character after the last character used in the conversion.
534 PRECISION is optional and used to indicate fractions are present.
536 Note locale'd grouping is not supported,
537 nor is skipping of white-space supported.
540 SSE_OK - valid number.
541 SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
542 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
543 SSE_INVALID_NUMBER - if no digits were found. */
544 static enum simple_strtod_error
545 simple_strtod_float (char const *input_str
,
551 enum simple_strtod_error e
= SSE_OK
;
556 /* TODO: accept locale'd grouped values for the integral part. */
557 e
= simple_strtod_int (input_str
, endptr
, value
, &negative
);
558 if (e
!= SSE_OK
&& e
!= SSE_OK_PRECISION_LOSS
)
561 /* optional decimal point + fraction. */
562 if (STREQ_LEN (*endptr
, decimal_point
, decimal_point_length
))
565 long double val_frac
= 0;
568 (*endptr
) += decimal_point_length
;
569 enum simple_strtod_error e2
=
570 simple_strtod_int (*endptr
, &ptr2
, &val_frac
, &neg_frac
);
571 if (e2
!= SSE_OK
&& e2
!= SSE_OK_PRECISION_LOSS
)
573 if (e2
== SSE_OK_PRECISION_LOSS
)
574 e
= e2
; /* propagate warning. */
576 return SSE_INVALID_NUMBER
;
578 /* number of digits in the fractions. */
579 size_t exponent
= ptr2
- *endptr
;
581 val_frac
= ((long double) val_frac
) / powerld (10, exponent
);
583 /* TODO: detect loss of precision (only really 18 digits
584 of precision across all digits (before and after '.')). */
594 *precision
= exponent
;
601 /* Read a 'human' INPUT_STR represented as "NNNN[.NNNNN] + suffix",
602 and return the value in a 'long double' VALUE,
603 with the precision of the input returned in PRECISION.
604 ENDPTR is required (unlike strtod) and is used to store a pointer
605 to the character after the last character used in the conversion.
606 ALLOWED_SCALING determines the scaling supported.
609 support locale'd grouping
610 accept scientific and hex floats (probably use strtold directly)
613 SSE_OK - valid number.
614 SSE_OK_PRECISION_LOSS - if more than LDBL_DIG digits were used.
615 SSE_OVERFLOW - if more than 33 digits (999Q) were used.
616 SSE_INVALID_NUMBER - if no digits were found.
617 SSE_VALID_BUT_FORBIDDEN_SUFFIX
619 SSE_MISSING_I_SUFFIX */
620 static enum simple_strtod_error
621 simple_strtod_human (char const *input_str
,
622 char **endptr
, long double *value
, size_t *precision
,
623 enum scale_type allowed_scaling
)
626 /* 'scale_auto' is checked below. */
627 int scale_base
= default_scale_base (allowed_scaling
);
629 devmsg ("simple_strtod_human:\n input string: %s\n"
630 " locale decimal-point: %s\n"
631 " MAX_UNSCALED_DIGITS: %d\n",
632 quote_n (0, input_str
),
633 quote_n (1, decimal_point
),
634 MAX_UNSCALED_DIGITS
);
636 enum simple_strtod_error e
=
637 simple_strtod_float (input_str
, endptr
, value
, precision
);
638 if (e
!= SSE_OK
&& e
!= SSE_OK_PRECISION_LOSS
)
641 devmsg (" parsed numeric value: %Lf\n"
642 " input precision = %d\n", *value
, (int)*precision
);
644 if (**endptr
!= '\0')
646 /* process suffix. */
648 /* Skip any blanks between the number and suffix. */
649 while (isblank (to_uchar (**endptr
)))
652 if (!valid_suffix (**endptr
))
653 return SSE_INVALID_SUFFIX
;
655 if (allowed_scaling
== scale_none
)
656 return SSE_VALID_BUT_FORBIDDEN_SUFFIX
;
658 power
= suffix_power (**endptr
);
659 (*endptr
)++; /* skip first suffix character. */
661 if (allowed_scaling
== scale_auto
&& **endptr
== 'i')
663 /* auto-scaling enabled, and the first suffix character
664 is followed by an 'i' (e.g. Ki, Mi, Gi). */
666 (*endptr
)++; /* skip second ('i') suffix character. */
667 devmsg (" Auto-scaling, found 'i', switching to base %d\n",
671 *precision
= 0; /* Reset, to select precision based on scale. */
674 if (allowed_scaling
== scale_IEC_I
)
679 return SSE_MISSING_I_SUFFIX
;
682 long double multiplier
= powerld (scale_base
, power
);
684 devmsg (" suffix power=%d^%d = %Lf\n", scale_base
, power
, multiplier
);
686 /* TODO: detect loss of precision and overflows. */
687 (*value
) = (*value
) * multiplier
;
689 devmsg (" returning value: %Lf (%LG)\n", *value
, *value
);
696 simple_strtod_fatal (enum simple_strtod_error err
, char const *input_str
)
698 char const *msgid
= nullptr;
702 case SSE_OK_PRECISION_LOSS
:
704 /* should never happen - this function isn't called when OK. */
708 msgid
= N_("value too large to be converted: %s");
711 case SSE_INVALID_NUMBER
:
712 msgid
= N_("invalid number: %s");
715 case SSE_VALID_BUT_FORBIDDEN_SUFFIX
:
716 msgid
= N_("rejecting suffix in input: %s (consider using --from)");
719 case SSE_INVALID_SUFFIX
:
720 msgid
= N_("invalid suffix in input: %s");
723 case SSE_MISSING_I_SUFFIX
:
724 msgid
= N_("missing 'i' suffix in input: %s (e.g Ki/Mi/Gi)");
729 if (inval_style
!= inval_ignore
)
730 error (conv_exit_code
, 0, gettext (msgid
), quote (input_str
));
733 /* Convert VAL to a human format string using PRECISION in BUF of size
734 BUF_SIZE. Use SCALE, GROUP, and ROUND to format. Return
735 the number of bytes needed to represent VAL. If this number is not
736 less than BUF_SIZE, the buffer is too small; if it is negative, the
737 formatting failed for some reason. */
739 double_to_human (long double val
, int precision
,
740 char *buf
, idx_t buf_size
,
741 enum scale_type scale
, int group
, enum round_type round
)
743 char fmt
[sizeof "%'0.*Lfi%s%s%s" + INT_STRLEN_BOUND (zero_padding_width
)];
750 if (zero_padding_width
)
751 pfmt
+= sprintf (pfmt
, "0%d", zero_padding_width
);
753 devmsg ("double_to_human:\n");
755 if (scale
== scale_none
)
757 val
*= powerld (10, precision
);
758 val
= simple_round (val
, round
);
759 val
/= powerld (10, precision
);
762 " no scaling, returning (grouped) value: %'.*Lf\n" :
763 " no scaling, returning value: %.*Lf\n", precision
, val
);
765 strcpy (pfmt
, ".*Lf%s");
767 return snprintf (buf
, buf_size
, fmt
, precision
, val
,
768 suffix
? suffix
: "");
771 /* Scaling requested by user. */
772 double scale_base
= default_scale_base (scale
);
774 /* Normalize val to scale. */
776 val
= expld (val
, scale_base
, &power
);
777 devmsg (" scaled value to %Lf * %0.f ^ %d\n", val
, scale_base
, power
);
779 /* Perform rounding. */
780 int power_adjust
= 0;
781 if (user_precision
!= -1)
782 power_adjust
= MIN (power
* 3, user_precision
);
783 else if (absld (val
) < 10)
785 /* for values less than 10, we allow one decimal-point digit,
786 so adjust before rounding. */
790 val
*= powerld (10, power_adjust
);
791 val
= simple_round (val
, round
);
792 val
/= powerld (10, power_adjust
);
794 /* two special cases after rounding:
795 1. a "999.99" can turn into 1000 - so scale down
796 2. a "9.99" can turn into 10 - so don't display decimal-point. */
797 if (absld (val
) >= scale_base
)
803 /* should "7.0" be printed as "7" ?
804 if removing the ".0" is preferred, enable the fourth condition. */
805 int show_decimal_point
= (val
!= 0) && (absld (val
) < 10) && (power
> 0);
806 /* && (absld (val) > simple_round_floor (val))) */
808 devmsg (" after rounding, value=%Lf * %0.f ^ %d\n", val
, scale_base
, power
);
810 strcpy (pfmt
, ".*Lf%s%s%s");
812 int prec
= user_precision
== -1 ? show_decimal_point
: user_precision
;
814 return snprintf (buf
, buf_size
, fmt
, prec
, val
,
815 power
== 1 && scale
== scale_SI
816 ? "k" : suffix_power_char (power
),
817 &"i"[! (scale
== scale_IEC_I
&& 0 < power
)],
818 suffix
? suffix
: "");
821 /* Convert a string of decimal digits, N_STRING, with an optional suffix
822 to an integral value. Suffixes are handled as with --from=auto.
823 Upon successful conversion, return that value.
824 If it cannot be converted, give a diagnostic and exit. */
826 unit_to_umax (char const *n_string
)
829 char const *c_string
= n_string
;
830 char *t_string
= nullptr;
831 size_t n_len
= strlen (n_string
);
834 char const *suffixes
= valid_suffixes
;
836 /* Adjust suffixes so K=1000, Ki=1024, KiB=invalid. */
837 if (n_len
&& ! c_isdigit (n_string
[n_len
- 1]))
839 t_string
= xmalloc (n_len
+ 2);
840 end
= t_string
+ n_len
- 1;
841 memcpy (t_string
, n_string
, n_len
);
843 if (*end
== 'i' && 2 <= n_len
&& ! c_isdigit (*(end
- 1)))
849 suffixes
= zero_and_valid_suffixes
;
855 s_err
= xstrtoumax (c_string
, &end
, 10, &n
, suffixes
);
857 if (s_err
!= LONGINT_OK
|| *end
|| n
== 0)
860 error (EXIT_FAILURE
, 0, _("invalid unit size: %s"), quote (n_string
));
871 if (status
!= EXIT_SUCCESS
)
876 Usage: %s [OPTION]... [NUMBER]...\n\
879 Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\
881 emit_mandatory_arg_note ();
883 --debug print warnings about invalid input\n\
886 -d, --delimiter=X use X instead of whitespace for field delimiter\n\
889 --field=FIELDS replace the numbers in these input fields (default=1);\n\
893 --format=FORMAT use printf style floating-point FORMAT;\n\
894 see FORMAT below for details\n\
897 --from=UNIT auto-scale input numbers to UNITs; default is 'none';\n\
901 --from-unit=N specify the input unit size (instead of the default 1)\n\
904 --grouping use locale-defined grouping of digits, e.g. 1,000,000\n\
905 (which means it has no effect in the C/POSIX locale)\n\
908 --header[=N] print (without converting) the first N header lines;\n\
909 N defaults to 1 if not specified\n\
912 --invalid=MODE failure mode for invalid numbers: MODE can be:\n\
913 abort (default), fail, warn, ignore\n\
916 --padding=N pad the output to N characters; positive N will\n\
917 right-align; negative N will left-align;\n\
918 padding is ignored if the output is wider than N;\n\
919 the default is to automatically pad if a whitespace\n\
923 --round=METHOD use METHOD for rounding when scaling; METHOD can be:\n\
924 up, down, from-zero (default), towards-zero, nearest\n\
927 --suffix=SUFFIX add SUFFIX to output numbers, and accept optional\n\
928 SUFFIX in input numbers\n\
931 --to=UNIT auto-scale output numbers to UNITs; see UNIT below\n\
934 --to-unit=N the output unit size (instead of the default 1)\n\
937 -z, --zero-terminated line delimiter is NUL, not newline\n\
939 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
940 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
944 UNIT options:\n"), stdout
);
946 none no auto-scaling is done; suffixes will trigger an error\n\
949 auto accept optional single/two letter suffix:\n\
950 1K = 1000, 1k = 1000,\n\
953 1Mi = 1048576,\n"), stdout
);
955 si accept optional single letter suffix:\n\
956 1k = 1000, 1K = 1000,\n\
960 iec accept optional single letter suffix:\n\
961 1K = 1024, 1k = 1024,\n\
965 iec-i accept optional two-letter suffix:\n\
966 1Ki = 1024, 1ki = 1024,\n\
971 FIELDS supports cut(1) style field ranges:\n\
972 N N'th field, counted from 1\n\
973 N- from N'th field, to end of line\n\
974 N-M from N'th to M'th field (inclusive)\n\
975 -M from first to M'th field (inclusive)\n\
977 Multiple fields/ranges can be separated with commas\n\
981 FORMAT must be suitable for printing one floating-point argument '%f'.\n\
982 Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
983 Optional width value (%10f) will pad output. Optional zero (%010f) width\n\
984 will zero pad the number. Optional negative values (%-10f) will left align.\n\
985 Optional precision (%.1f) will override the input determined precision.\n\
989 Exit status is 0 if all input numbers were successfully converted.\n\
990 By default, %s will stop at the first conversion error with exit status 2.\n\
991 With --invalid='fail' a warning is printed for each conversion error\n\
992 and the exit status is 2. With --invalid='warn' each conversion error is\n\
993 diagnosed, but the exit status is 0. With --invalid='ignore' conversion\n\
994 errors are not diagnosed and the exit status is 0.\n\
1001 $ %s --to=iec 2048\n\
1003 $ %s --to=iec-i 4096\n\
1005 $ echo 1K | %s --from=si\n\
1007 $ echo 1K | %s --from=iec\n\
1009 $ df -B1 | %s --header --field 2-4 --to=si\n\
1010 $ ls -l | %s --header --field 5 --to=iec\n\
1011 $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
1012 $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"),
1013 program_name
, program_name
, program_name
,
1014 program_name
, program_name
, program_name
,
1015 program_name
, program_name
, program_name
);
1016 emit_ancillary_info (PROGRAM_NAME
);
1021 /* Given 'fmt' (a printf(3) compatible format string), extracts the following:
1022 1. padding (e.g. %20f)
1023 2. alignment (e.g. %-20f)
1024 3. grouping (e.g. %'f)
1026 Only a limited subset of printf(3) syntax is supported.
1029 support %e %g etc. rather than just %f
1032 1. This function sets the global variables:
1033 padding_width, grouping,
1034 format_str_prefix, format_str_suffix
1035 2. The function aborts on any errors. */
1037 parse_format_string (char const *fmt
)
1040 size_t prefix_len
= 0;
1042 char *endptr
= nullptr;
1043 bool zero_padding
= false;
1045 for (i
= 0; !(fmt
[i
] == '%' && fmt
[i
+ 1] != '%'); i
+= (fmt
[i
] == '%') + 1)
1048 error (EXIT_FAILURE
, 0,
1049 _("format %s has no %% directive"), quote (fmt
));
1056 size_t skip
= strspn (fmt
+ i
, " ");
1063 else if (fmt
[i
] == '0')
1065 zero_padding
= true;
1072 intmax_t pad
= strtoimax (fmt
+ i
, &endptr
, 10);
1076 if (debug
&& padding_width
&& !(zero_padding
&& pad
> 0))
1077 error (0, 0, _("--format padding overriding --padding"));
1079 /* Set padding width and alignment. On overflow, set widths to
1080 large values that cause later code to avoid undefined behavior
1081 and fail at a reasonable point. */
1083 padding_width
= pad
;
1087 zero_padding_width
= MIN (pad
, INT_MAX
);
1089 padding_width
= pad
;
1095 error (EXIT_FAILURE
, 0, _("format %s ends in %%"), quote (fmt
));
1101 user_precision
= strtol (fmt
+ i
, &endptr
, 10);
1102 if (errno
== ERANGE
|| user_precision
< 0 || SIZE_MAX
< user_precision
1103 || isblank (fmt
[i
]) || fmt
[i
] == '+')
1105 /* Note we disallow negative user_precision to be
1106 consistent with printf(1). POSIX states that
1107 negative precision is only supported (and ignored)
1108 when used with '.*f'. glibc at least will malform
1109 output when passed a direct negative precision. */
1110 error (EXIT_FAILURE
, 0,
1111 _("invalid precision in format %s"), quote (fmt
));
1117 error (EXIT_FAILURE
, 0, _("invalid format %s,"
1118 " directive must be %%[0]['][-][N][.][N]f"),
1123 for (; fmt
[i
] != '\0'; i
+= (fmt
[i
] == '%') + 1)
1124 if (fmt
[i
] == '%' && fmt
[i
+ 1] != '%')
1125 error (EXIT_FAILURE
, 0, _("format %s has too many %% directives"),
1129 format_str_prefix
= ximemdup0 (fmt
, prefix_len
);
1130 if (fmt
[suffix_pos
] != '\0')
1131 format_str_suffix
= xstrdup (fmt
+ suffix_pos
);
1133 devmsg ("format String:\n input: %s\n grouping: %s\n"
1134 " padding width: %jd\n"
1135 " prefix: %s\n suffix: %s\n",
1136 quote_n (0, fmt
), (grouping
) ? "yes" : "no",
1138 quote_n (1, format_str_prefix
? format_str_prefix
: ""),
1139 quote_n (2, format_str_suffix
? format_str_suffix
: ""));
1142 /* Parse a numeric value (with optional suffix) from a string.
1143 Returns a long double value, with input precision.
1145 If there's an error converting the string to value - exits with
1148 If there are any trailing characters after the number
1149 (besides a valid suffix) - exits with an error. */
1150 static enum simple_strtod_error
1151 parse_human_number (char const *str
, long double /*output */ *value
,
1154 char *ptr
= nullptr;
1156 enum simple_strtod_error e
=
1157 simple_strtod_human (str
, &ptr
, value
, precision
, scale_from
);
1158 if (e
!= SSE_OK
&& e
!= SSE_OK_PRECISION_LOSS
)
1160 simple_strtod_fatal (e
, str
);
1164 if (ptr
&& *ptr
!= '\0')
1166 if (inval_style
!= inval_ignore
)
1167 error (conv_exit_code
, 0, _("invalid suffix in input %s: %s"),
1168 quote_n (0, str
), quote_n (1, ptr
));
1169 e
= SSE_INVALID_SUFFIX
;
1175 /* Print the given VAL, using the requested representation.
1176 The number is printed to STDOUT, with padding and alignment. */
1178 prepare_padded_number (const long double val
, size_t precision
,
1181 /* Generate Output. */
1182 size_t precision_used
= user_precision
== -1 ? precision
: user_precision
;
1184 /* Can't reliably print too-large values without auto-scaling. */
1186 expld (val
, 10, &x
);
1188 if (scale_to
== scale_none
1189 && x
+ precision_used
> MAX_UNSCALED_DIGITS
)
1191 if (inval_style
!= inval_ignore
)
1194 error (conv_exit_code
, 0,
1195 _("value/precision too large to be printed: '%Lg/%zu'"
1196 " (consider using --to)"), val
, precision_used
);
1198 error (conv_exit_code
, 0,
1199 _("value too large to be printed: '%Lg'"
1200 " (consider using --to)"), val
);
1205 if (x
> MAX_ACCEPTABLE_DIGITS
- 1)
1207 if (inval_style
!= inval_ignore
)
1208 error (conv_exit_code
, 0, _("value too large to be printed: '%Lg'"
1209 " (cannot handle values > 999Q)"), val
);
1215 int numlen
= double_to_human (val
, precision_used
,
1216 padding_buffer
, padding_buffer_size
,
1217 scale_to
, grouping
, round_style
);
1219 if (numlen
< 0 || ckd_sub (&growth
, numlen
, padding_buffer_size
- 1))
1220 error (EXIT_FAILURE
, 0,
1221 _("failed to prepare value '%Lf' for printing"), val
);
1224 padding_buffer
= xpalloc (padding_buffer
, &padding_buffer_size
,
1228 devmsg ("formatting output:\n value: %Lf\n humanized: %s\n",
1229 val
, quote (padding_buffer
));
1234 int buf_width
= mbswidth (padding_buffer
,
1235 MBSW_REJECT_INVALID
| MBSW_REJECT_UNPRINTABLE
);
1238 if (padding_width
< 0)
1240 if (padding_width
< -buf_width
)
1241 pad
= padding_width
+ buf_width
;
1245 if (buf_width
< padding_width
)
1246 pad
= padding_width
- buf_width
;
1256 print_padded_number (intmax_t padding
)
1258 if (format_str_prefix
)
1259 fputs (format_str_prefix
, stdout
);
1261 for (intmax_t p
= padding
; 0 < p
; p
--)
1264 fputs (padding_buffer
, stdout
);
1266 for (intmax_t p
= padding
; p
< 0; p
++)
1269 if (format_str_suffix
)
1270 fputs (format_str_suffix
, stdout
);
1273 /* Converts the TEXT number string to the requested representation,
1274 and handles automatic suffix addition. */
1276 process_suffixed_number (char *text
, long double *result
,
1277 size_t *precision
, long int field
)
1279 if (suffix
&& strlen (text
) > strlen (suffix
))
1281 char *possible_suffix
= text
+ strlen (text
) - strlen (suffix
);
1283 if (STREQ (suffix
, possible_suffix
))
1285 /* trim suffix, ONLY if it's at the end of the text. */
1286 *possible_suffix
= '\0';
1287 devmsg ("trimming suffix %s\n", quote (suffix
));
1290 devmsg ("no valid suffix found\n");
1293 /* Skip white space - always. */
1295 while (*p
&& isblank (to_uchar (*p
)))
1298 /* setup auto-padding. */
1301 padding_width
= text
< p
|| 1 < field
? strlen (text
) : 0;
1302 devmsg ("setting Auto-Padding to %jd characters\n", padding_width
);
1305 long double val
= 0;
1306 enum simple_strtod_error e
= parse_human_number (p
, &val
, precision
);
1307 if (e
== SSE_OK_PRECISION_LOSS
&& debug
)
1308 error (0, 0, _("large input value %s: possible precision loss"),
1311 if (from_unit_size
!= 1 || to_unit_size
!= 1)
1312 val
= (val
* from_unit_size
) / to_unit_size
;
1316 return (e
== SSE_OK
|| e
== SSE_OK_PRECISION_LOSS
);
1320 newline_or_blank (mcel_t g
)
1322 return g
.ch
== '\n' || c32isblank (g
.ch
);
1325 /* Return a pointer to the beginning of the next field in line.
1326 The line pointer is moved to the end of the next field. */
1328 next_field (char **line
)
1330 char *field_start
= *line
;
1331 char *field_end
= field_start
;
1333 if (delimiter
!= DELIMITER_DEFAULT
)
1335 if (*field_start
!= delimiter
)
1337 while (*field_end
&& *field_end
!= delimiter
)
1340 /* else empty field */
1344 /* keep any space prefix in the returned field */
1345 field_end
= skip_str_matching (field_end
, newline_or_blank
, true);
1346 field_end
= skip_str_matching (field_end
, newline_or_blank
, false);
1355 include_field (uintmax_t field
)
1357 struct field_range_pair
*p
= frp
;
1361 while (p
->lo
!= UINTMAX_MAX
)
1363 if (p
->lo
<= field
&& p
->hi
>= field
)
1370 /* Convert and output the given field. If it is not included in the set
1371 of fields to process just output the original */
1373 process_field (char *text
, uintmax_t field
)
1375 long double val
= 0;
1376 size_t precision
= 0;
1377 bool valid_number
= true;
1379 if (include_field (field
))
1382 process_suffixed_number (text
, &val
, &precision
, field
);
1386 valid_number
= prepare_padded_number (val
, precision
, &padding
);
1389 print_padded_number (padding
);
1391 fputs (text
, stdout
);
1394 fputs (text
, stdout
);
1396 return valid_number
;
1399 /* Convert number in a given line of text.
1400 NEWLINE specifies whether to output a '\n' for this "line". */
1402 process_line (char *line
, bool newline
)
1405 uintmax_t field
= 0;
1406 bool valid_number
= true;
1410 next
= next_field (&line
);
1414 /* nul terminate the current field string and process */
1417 if (! process_field (next
, field
))
1418 valid_number
= false;
1420 fputc ((delimiter
== DELIMITER_DEFAULT
) ?
1421 ' ' : delimiter
, stdout
);
1426 /* end of the line, process the last field and finish */
1427 if (! process_field (next
, field
))
1428 valid_number
= false;
1435 putchar (line_delim
);
1437 return valid_number
;
1441 main (int argc
, char **argv
)
1443 int valid_numbers
= 1;
1446 initialize_main (&argc
, &argv
);
1447 set_program_name (argv
[0]);
1448 locale_ok
= !!setlocale (LC_ALL
, "");
1449 bindtextdomain (PACKAGE
, LOCALEDIR
);
1450 textdomain (PACKAGE
);
1453 /* Enabled extended precision if needed. */
1457 decimal_point
= nl_langinfo (RADIXCHAR
);
1458 if (decimal_point
== nullptr || strlen (decimal_point
) == 0)
1459 decimal_point
= ".";
1460 decimal_point_length
= strlen (decimal_point
);
1462 atexit (close_stdout
);
1466 int c
= getopt_long (argc
, argv
, "d:z", longopts
, nullptr);
1474 scale_from
= XARGMATCH ("--from", optarg
,
1475 scale_from_args
, scale_from_types
);
1478 case FROM_UNIT_OPTION
:
1479 from_unit_size
= unit_to_umax (optarg
);
1484 XARGMATCH ("--to", optarg
, scale_to_args
, scale_to_types
);
1487 case TO_UNIT_OPTION
:
1488 to_unit_size
= unit_to_umax (optarg
);
1492 round_style
= XARGMATCH ("--round", optarg
, round_args
, round_types
);
1495 case GROUPING_OPTION
:
1499 case PADDING_OPTION
:
1500 if (((xstrtoimax (optarg
, nullptr, 10, &padding_width
, "")
1501 & ~LONGINT_OVERFLOW
)
1503 || padding_width
== 0)
1504 error (EXIT_FAILURE
, 0, _("invalid padding value %s"),
1506 /* TODO: We probably want to apply a specific --padding
1507 to --header lines too. */
1512 error (EXIT_FAILURE
, 0, _("multiple field specifications"));
1513 set_fields (optarg
, SETFLD_ALLOW_DASH
);
1517 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
1518 if (optarg
[0] != '\0' && optarg
[1] != '\0')
1519 error (EXIT_FAILURE
, 0,
1520 _("the delimiter must be a single character"));
1521 delimiter
= optarg
[0];
1536 case DEV_DEBUG_OPTION
:
1544 if (xstrtoumax (optarg
, nullptr, 10, &header
, "") != LONGINT_OK
1546 error (EXIT_FAILURE
, 0, _("invalid header value %s"),
1556 format_str
= optarg
;
1559 case INVALID_OPTION
:
1560 inval_style
= XARGMATCH ("--invalid", optarg
,
1561 inval_args
, inval_types
);
1564 case_GETOPT_HELP_CHAR
;
1565 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
1568 usage (EXIT_FAILURE
);
1572 if (format_str
!= nullptr && grouping
)
1573 error (EXIT_FAILURE
, 0, _("--grouping cannot be combined with --format"));
1575 if (debug
&& ! locale_ok
)
1576 error (0, 0, _("failed to set locale"));
1578 /* Warn about no-op. */
1579 if (debug
&& scale_from
== scale_none
&& scale_to
== scale_none
1580 && !grouping
&& (padding_width
== 0) && (format_str
== nullptr))
1581 error (0, 0, _("no conversion option specified"));
1584 parse_format_string (format_str
);
1588 if (scale_to
!= scale_none
)
1589 error (EXIT_FAILURE
, 0, _("grouping cannot be combined with --to"));
1590 if (debug
&& (strlen (nl_langinfo (THOUSEP
)) == 0))
1591 error (0, 0, _("grouping has no effect in this locale"));
1594 auto_padding
= (padding_width
== 0 && delimiter
== DELIMITER_DEFAULT
);
1596 if (inval_style
!= inval_abort
)
1601 if (debug
&& header
)
1602 error (0, 0, _("--header ignored with command-line input"));
1604 for (; optind
< argc
; optind
++)
1605 valid_numbers
&= process_line (argv
[optind
], true);
1609 char *line
= nullptr;
1610 size_t line_allocated
= 0;
1613 while (header
-- && getdelim (&line
, &line_allocated
,
1614 line_delim
, stdin
) > 0)
1615 fputs (line
, stdout
);
1617 while ((len
= getdelim (&line
, &line_allocated
,
1618 line_delim
, stdin
)) > 0)
1620 bool newline
= line
[len
- 1] == line_delim
;
1622 line
[len
- 1] = '\0';
1623 valid_numbers
&= process_line (line
, newline
);
1627 error (EXIT_FAILURE
, errno
, _("error reading input"));
1630 if (debug
&& !valid_numbers
)
1631 error (0, 0, _("failed to convert some of the input numbers"));
1633 int exit_status
= EXIT_SUCCESS
;
1635 && inval_style
!= inval_warn
&& inval_style
!= inval_ignore
)
1636 exit_status
= EXIT_CONVERSION_WARNINGS
;
1638 main_exit (exit_status
);