1 /* unexpand - convert blanks to tabs
2 Copyright (C) 1989-2015 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By default, convert only maximal strings of initial blanks and tabs
19 Preserves backspace characters in the output; they decrement the
20 column count for tab calculations.
21 The default action is equivalent to -8.
24 --tabs=tab1[,tab2[,...]]
26 -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
27 columns apart instead of the default 8. Otherwise,
28 set the tabs at columns tab1, tab2, etc. (numbered from
29 0); preserve any blanks beyond the tab stops given.
31 -a Use tabs wherever they would replace 2 or more blanks,
32 not just at the beginnings of lines.
34 David MacKenzie <djm@gnu.ai.mit.edu> */
40 #include <sys/types.h>
47 /* The official name of this program (e.g., no 'g' prefix). */
48 #define PROGRAM_NAME "unexpand"
50 #define AUTHORS proper_name ("David MacKenzie")
52 /* If true, convert blanks even after nonblank characters have been
54 static bool convert_entire_line
;
56 /* If nonzero, the size of all tab stops. If zero, use 'tab_list' instead. */
57 static size_t tab_size
;
59 /* The maximum distance between tab stops. */
60 static size_t max_column_width
;
62 /* Array of the explicit column numbers of the tab stops;
63 after 'tab_list' is exhausted, the rest of the line is printed
64 unchanged. The first column is column 0. */
65 static uintmax_t *tab_list
;
67 /* The number of allocated entries in 'tab_list'. */
68 static size_t n_tabs_allocated
;
70 /* The index of the first invalid element of 'tab_list',
71 where the next element can be added. */
72 static size_t first_free_tab
;
74 /* Null-terminated array of input filenames. */
75 static char **file_list
;
77 /* Default for 'file_list' if no files are given on the command line. */
78 static char *stdin_argv
[] =
83 /* True if we have ever read standard input. */
84 static bool have_read_stdin
;
86 /* The desired exit status. */
87 static int exit_status
;
89 /* For long options that have no equivalent short option, use a
90 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
93 CONVERT_FIRST_ONLY_OPTION
= CHAR_MAX
+ 1
96 static struct option
const longopts
[] =
98 {"tabs", required_argument
, NULL
, 't'},
99 {"all", no_argument
, NULL
, 'a'},
100 {"first-only", no_argument
, NULL
, CONVERT_FIRST_ONLY_OPTION
},
101 {GETOPT_HELP_OPTION_DECL
},
102 {GETOPT_VERSION_OPTION_DECL
},
109 if (status
!= EXIT_SUCCESS
)
114 Usage: %s [OPTION]... [FILE]...\n\
118 Convert blanks in each FILE to tabs, writing to standard output.\n\
122 emit_mandatory_arg_note ();
125 -a, --all convert all blanks, instead of just initial blanks\n\
126 --first-only convert only leading sequences of blanks (overrides -a)\n\
127 -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
128 -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\
130 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
131 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
132 emit_ancillary_info (PROGRAM_NAME
);
137 /* Add tab stop TABVAL to the end of 'tab_list'. */
140 add_tab_stop (uintmax_t tabval
)
142 uintmax_t prev_column
= first_free_tab
? tab_list
[first_free_tab
- 1] : 0;
143 uintmax_t column_width
= prev_column
<= tabval
? tabval
- prev_column
: 0;
145 if (first_free_tab
== n_tabs_allocated
)
146 tab_list
= X2NREALLOC (tab_list
, &n_tabs_allocated
);
147 tab_list
[first_free_tab
++] = tabval
;
149 if (max_column_width
< column_width
)
151 if (SIZE_MAX
< column_width
)
152 error (EXIT_FAILURE
, 0, _("tabs are too far apart"));
153 max_column_width
= column_width
;
157 /* Add the comma or blank separated list of tab stops STOPS
158 to the list of tab stops. */
161 parse_tab_stops (char const *stops
)
163 bool have_tabval
= false;
164 uintmax_t tabval
IF_LINT ( = 0);
165 char const *num_start
IF_LINT ( = NULL
);
168 for (; *stops
; stops
++)
170 if (*stops
== ',' || isblank (to_uchar (*stops
)))
173 add_tab_stop (tabval
);
176 else if (ISDIGIT (*stops
))
185 /* Detect overflow. */
186 if (!DECIMAL_DIGIT_ACCUMULATE (tabval
, *stops
- '0', uintmax_t))
188 size_t len
= strspn (num_start
, "0123456789");
189 char *bad_num
= xstrndup (num_start
, len
);
190 error (0, 0, _("tab stop is too large %s"), quote (bad_num
));
193 stops
= num_start
+ len
- 1;
198 error (0, 0, _("tab size contains invalid character(s): %s"),
209 add_tab_stop (tabval
);
212 /* Check that the list of tab stops TABS, with ENTRIES entries,
213 contains only nonzero, ascending values. */
216 validate_tab_stops (uintmax_t const *tabs
, size_t entries
)
218 uintmax_t prev_tab
= 0;
221 for (i
= 0; i
< entries
; i
++)
224 error (EXIT_FAILURE
, 0, _("tab size cannot be 0"));
225 if (tabs
[i
] <= prev_tab
)
226 error (EXIT_FAILURE
, 0, _("tab sizes must be ascending"));
231 /* Close the old stream pointer FP if it is non-NULL,
232 and return a new one opened to read the next input file.
233 Open a filename of '-' as the standard input.
234 Return NULL if there are no more input files. */
239 static char *prev_file
;
246 error (0, errno
, "%s", quotef (prev_file
));
247 exit_status
= EXIT_FAILURE
;
249 if (STREQ (prev_file
, "-"))
250 clearerr (fp
); /* Also clear EOF. */
251 else if (fclose (fp
) != 0)
253 error (0, errno
, "%s", quotef (prev_file
));
254 exit_status
= EXIT_FAILURE
;
258 while ((file
= *file_list
++) != NULL
)
260 if (STREQ (file
, "-"))
262 have_read_stdin
= true;
266 fp
= fopen (file
, "r");
270 fadvise (fp
, FADVISE_SEQUENTIAL
);
273 error (0, errno
, "%s", quotef (file
));
274 exit_status
= EXIT_FAILURE
;
279 /* Change blanks to tabs, writing to stdout.
280 Read each file in 'file_list', in order. */
286 FILE *fp
= next_file (NULL
);
288 /* The array of pending blanks. In non-POSIX locales, blanks can
289 include characters other than spaces, so the blanks must be
290 stored, not merely counted. */
296 /* The worst case is a non-blank character, then one blank, then a
297 tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
298 allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
299 pending_blank
= xmalloc (max_column_width
);
303 /* Input character, or EOF. */
306 /* If true, perform translations. */
310 /* The following variables have valid values only when CONVERT
313 /* Column of next input character. */
314 uintmax_t column
= 0;
316 /* Column the next input tab stop is on. */
317 uintmax_t next_tab_column
= 0;
319 /* Index in TAB_LIST of next tab stop to examine. */
320 size_t tab_index
= 0;
322 /* If true, the first pending blank came just before a tab stop. */
323 bool one_blank_before_tab_stop
= false;
325 /* If true, the previous input character was a blank. This is
326 initially true, since initial strings of blanks are treated
327 as if the line was preceded by a blank. */
328 bool prev_blank
= true;
330 /* Number of pending columns of blanks. */
334 /* Convert a line of text. */
338 while ((c
= getc (fp
)) < 0 && (fp
= next_file (fp
)))
343 bool blank
= !! isblank (c
);
347 if (next_tab_column
<= column
)
351 column
+ (tab_size
- column
% tab_size
);
354 if (tab_index
== first_free_tab
)
361 uintmax_t tab
= tab_list
[tab_index
++];
364 next_tab_column
= tab
;
372 if (next_tab_column
< column
)
373 error (EXIT_FAILURE
, 0, _("input line is too long"));
377 column
= next_tab_column
;
380 pending_blank
[0] = '\t';
386 if (! (prev_blank
&& column
== next_tab_column
))
388 /* It is not yet known whether the pending blanks
389 will be replaced by tabs. */
390 if (column
== next_tab_column
)
391 one_blank_before_tab_stop
= true;
392 pending_blank
[pending
++] = c
;
397 /* Replace the pending blanks by a tab or two. */
398 pending_blank
[0] = c
= '\t';
401 /* Discard pending blanks, unless it was a single
402 blank just before the previous tab stop. */
403 pending
= one_blank_before_tab_stop
;
408 /* Go back one column, and force recalculation of the
411 next_tab_column
= column
;
412 tab_index
-= !!tab_index
;
418 error (EXIT_FAILURE
, 0, _("input line is too long"));
423 if (pending
> 1 && one_blank_before_tab_stop
)
424 pending_blank
[0] = '\t';
425 if (fwrite (pending_blank
, 1, pending
, stdout
) != pending
)
426 error (EXIT_FAILURE
, errno
, _("write error"));
428 one_blank_before_tab_stop
= false;
432 convert
&= convert_entire_line
|| blank
;
437 free (pending_blank
);
442 error (EXIT_FAILURE
, errno
, _("write error"));
449 main (int argc
, char **argv
)
451 bool have_tabval
= false;
452 uintmax_t tabval
IF_LINT ( = 0);
455 /* If true, cancel the effect of any -a (explicit or implicit in -t),
456 so that only leading blanks will be considered. */
457 bool convert_first_only
= false;
459 initialize_main (&argc
, &argv
);
460 set_program_name (argv
[0]);
461 setlocale (LC_ALL
, "");
462 bindtextdomain (PACKAGE
, LOCALEDIR
);
463 textdomain (PACKAGE
);
465 atexit (close_stdout
);
467 have_read_stdin
= false;
468 exit_status
= EXIT_SUCCESS
;
469 convert_entire_line
= false;
473 while ((c
= getopt_long (argc
, argv
, ",0123456789at:", longopts
, NULL
))
479 usage (EXIT_FAILURE
);
481 convert_entire_line
= true;
484 convert_entire_line
= true;
485 parse_tab_stops (optarg
);
487 case CONVERT_FIRST_ONLY_OPTION
:
488 convert_first_only
= true;
492 add_tab_stop (tabval
);
495 case_GETOPT_HELP_CHAR
;
496 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
503 if (!DECIMAL_DIGIT_ACCUMULATE (tabval
, c
- '0', uintmax_t))
504 error (EXIT_FAILURE
, 0, _("tab stop value is too large"));
509 if (convert_first_only
)
510 convert_entire_line
= false;
513 add_tab_stop (tabval
);
515 validate_tab_stops (tab_list
, first_free_tab
);
517 if (first_free_tab
== 0)
518 tab_size
= max_column_width
= 8;
519 else if (first_free_tab
== 1)
520 tab_size
= tab_list
[0];
524 file_list
= (optind
< argc
? &argv
[optind
] : stdin_argv
);
528 if (have_read_stdin
&& fclose (stdin
) != 0)
529 error (EXIT_FAILURE
, errno
, "-");