tests: unpack xz-compressed tarballs when possible, not always *.gz
[coreutils.git] / src / unexpand.c
blobcfbc46bd7005670922b6ceb9b3c67d64c1cc69ab
1 /* unexpand - convert blanks to tabs
2 Copyright (C) 89, 91, 1995-2006, 2008-2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By default, convert only maximal strings of initial blanks and tabs
18 into tabs.
19 Preserves backspace characters in the output; they decrement the
20 column count for tab calculations.
21 The default action is equivalent to -8.
23 Options:
24 --tabs=tab1[,tab2[,...]]
25 -t tab1[,tab2[,...]]
26 -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
27 columns apart instead of the default 8. Otherwise,
28 set the tabs at columns tab1, tab2, etc. (numbered from
29 0); preserve any blanks beyond the tab stops given.
30 --all
31 -a Use tabs wherever they would replace 2 or more blanks,
32 not just at the beginnings of lines.
34 David MacKenzie <djm@gnu.ai.mit.edu> */
36 #include <config.h>
38 #include <stdio.h>
39 #include <getopt.h>
40 #include <sys/types.h>
41 #include "system.h"
42 #include "error.h"
43 #include "quote.h"
44 #include "xstrndup.h"
46 /* The official name of this program (e.g., no `g' prefix). */
47 #define PROGRAM_NAME "unexpand"
49 #define AUTHORS proper_name ("David MacKenzie")
51 /* If true, convert blanks even after nonblank characters have been
52 read on the line. */
53 static bool convert_entire_line;
55 /* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
56 static size_t tab_size;
58 /* The maximum distance between tab stops. */
59 static size_t max_column_width;
61 /* Array of the explicit column numbers of the tab stops;
62 after `tab_list' is exhausted, the rest of the line is printed
63 unchanged. The first column is column 0. */
64 static uintmax_t *tab_list;
66 /* The number of allocated entries in `tab_list'. */
67 static size_t n_tabs_allocated;
69 /* The index of the first invalid element of `tab_list',
70 where the next element can be added. */
71 static size_t first_free_tab;
73 /* Null-terminated array of input filenames. */
74 static char **file_list;
76 /* Default for `file_list' if no files are given on the command line. */
77 static char *stdin_argv[] =
79 (char *) "-", NULL
82 /* True if we have ever read standard input. */
83 static bool have_read_stdin;
85 /* The desired exit status. */
86 static int exit_status;
88 /* For long options that have no equivalent short option, use a
89 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
90 enum
92 CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
95 static struct option const longopts[] =
97 {"tabs", required_argument, NULL, 't'},
98 {"all", no_argument, NULL, 'a'},
99 {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION},
100 {GETOPT_HELP_OPTION_DECL},
101 {GETOPT_VERSION_OPTION_DECL},
102 {NULL, 0, NULL, 0}
105 void
106 usage (int status)
108 if (status != EXIT_SUCCESS)
109 fprintf (stderr, _("Try `%s --help' for more information.\n"),
110 program_name);
111 else
113 printf (_("\
114 Usage: %s [OPTION]... [FILE]...\n\
116 program_name);
117 fputs (_("\
118 Convert blanks in each FILE to tabs, writing to standard output.\n\
119 With no FILE, or when FILE is -, read standard input.\n\
121 "), stdout);
122 fputs (_("\
123 Mandatory arguments to long options are mandatory for short options too.\n\
124 "), stdout);
125 fputs (_("\
126 -a, --all convert all blanks, instead of just initial blanks\n\
127 --first-only convert only leading sequences of blanks (overrides -a)\n\
128 -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
129 -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\
130 "), stdout);
131 fputs (HELP_OPTION_DESCRIPTION, stdout);
132 fputs (VERSION_OPTION_DESCRIPTION, stdout);
133 emit_ancillary_info ();
135 exit (status);
138 /* Add tab stop TABVAL to the end of `tab_list'. */
140 static void
141 add_tab_stop (uintmax_t tabval)
143 uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0;
144 uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0;
146 if (first_free_tab == n_tabs_allocated)
147 tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
148 tab_list[first_free_tab++] = tabval;
150 if (max_column_width < column_width)
152 if (SIZE_MAX < column_width)
153 error (EXIT_FAILURE, 0, _("tabs are too far apart"));
154 max_column_width = column_width;
158 /* Add the comma or blank separated list of tab stops STOPS
159 to the list of tab stops. */
161 static void
162 parse_tab_stops (char const *stops)
164 bool have_tabval = false;
165 uintmax_t tabval IF_LINT (= 0);
166 char const *num_start IF_LINT (= NULL);
167 bool ok = true;
169 for (; *stops; stops++)
171 if (*stops == ',' || isblank (to_uchar (*stops)))
173 if (have_tabval)
174 add_tab_stop (tabval);
175 have_tabval = false;
177 else if (ISDIGIT (*stops))
179 if (!have_tabval)
181 tabval = 0;
182 have_tabval = true;
183 num_start = stops;
186 /* Detect overflow. */
187 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
189 size_t len = strspn (num_start, "0123456789");
190 char *bad_num = xstrndup (num_start, len);
191 error (0, 0, _("tab stop is too large %s"), quote (bad_num));
192 free (bad_num);
193 ok = false;
194 stops = num_start + len - 1;
197 else
199 error (0, 0, _("tab size contains invalid character(s): %s"),
200 quote (stops));
201 ok = false;
202 break;
206 if (!ok)
207 exit (EXIT_FAILURE);
209 if (have_tabval)
210 add_tab_stop (tabval);
213 /* Check that the list of tab stops TABS, with ENTRIES entries,
214 contains only nonzero, ascending values. */
216 static void
217 validate_tab_stops (uintmax_t const *tabs, size_t entries)
219 uintmax_t prev_tab = 0;
220 size_t i;
222 for (i = 0; i < entries; i++)
224 if (tabs[i] == 0)
225 error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
226 if (tabs[i] <= prev_tab)
227 error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
228 prev_tab = tabs[i];
232 /* Close the old stream pointer FP if it is non-NULL,
233 and return a new one opened to read the next input file.
234 Open a filename of `-' as the standard input.
235 Return NULL if there are no more input files. */
237 static FILE *
238 next_file (FILE *fp)
240 static char *prev_file;
241 char *file;
243 if (fp)
245 if (ferror (fp))
247 error (0, errno, "%s", prev_file);
248 exit_status = EXIT_FAILURE;
250 if (STREQ (prev_file, "-"))
251 clearerr (fp); /* Also clear EOF. */
252 else if (fclose (fp) != 0)
254 error (0, errno, "%s", prev_file);
255 exit_status = EXIT_FAILURE;
259 while ((file = *file_list++) != NULL)
261 if (STREQ (file, "-"))
263 have_read_stdin = true;
264 prev_file = file;
265 return stdin;
267 fp = fopen (file, "r");
268 if (fp)
270 prev_file = file;
271 return fp;
273 error (0, errno, "%s", file);
274 exit_status = EXIT_FAILURE;
276 return NULL;
279 /* Change blanks to tabs, writing to stdout.
280 Read each file in `file_list', in order. */
282 static void
283 unexpand (void)
285 /* Input stream. */
286 FILE *fp = next_file (NULL);
288 /* The array of pending blanks. In non-POSIX locales, blanks can
289 include characters other than spaces, so the blanks must be
290 stored, not merely counted. */
291 char *pending_blank;
293 if (!fp)
294 return;
296 /* The worst case is a non-blank character, then one blank, then a
297 tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
298 allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
299 pending_blank = xmalloc (max_column_width);
301 for (;;)
303 /* Input character, or EOF. */
304 int c;
306 /* If true, perform translations. */
307 bool convert = true;
310 /* The following variables have valid values only when CONVERT
311 is true: */
313 /* Column of next input character. */
314 uintmax_t column = 0;
316 /* Column the next input tab stop is on. */
317 uintmax_t next_tab_column = 0;
319 /* Index in TAB_LIST of next tab stop to examine. */
320 size_t tab_index = 0;
322 /* If true, the first pending blank came just before a tab stop. */
323 bool one_blank_before_tab_stop = false;
325 /* If true, the previous input character was a blank. This is
326 initially true, since initial strings of blanks are treated
327 as if the line was preceded by a blank. */
328 bool prev_blank = true;
330 /* Number of pending columns of blanks. */
331 size_t pending = 0;
334 /* Convert a line of text. */
338 while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
339 continue;
341 if (convert)
343 bool blank = !! isblank (c);
345 if (blank)
347 if (next_tab_column <= column)
349 if (tab_size)
350 next_tab_column =
351 column + (tab_size - column % tab_size);
352 else
353 for (;;)
354 if (tab_index == first_free_tab)
356 convert = false;
357 break;
359 else
361 uintmax_t tab = tab_list[tab_index++];
362 if (column < tab)
364 next_tab_column = tab;
365 break;
370 if (convert)
372 if (next_tab_column < column)
373 error (EXIT_FAILURE, 0, _("input line is too long"));
375 if (c == '\t')
377 column = next_tab_column;
379 /* Discard pending blanks, unless it was a single
380 blank just before the previous tab stop. */
381 if (! (pending == 1 && one_blank_before_tab_stop))
383 pending = 0;
384 one_blank_before_tab_stop = false;
387 else
389 column++;
391 if (! (prev_blank && column == next_tab_column))
393 /* It is not yet known whether the pending blanks
394 will be replaced by tabs. */
395 if (column == next_tab_column)
396 one_blank_before_tab_stop = true;
397 pending_blank[pending++] = c;
398 prev_blank = true;
399 continue;
402 /* Replace the pending blanks by a tab or two. */
403 pending_blank[0] = c = '\t';
404 pending = one_blank_before_tab_stop;
408 else if (c == '\b')
410 /* Go back one column, and force recalculation of the
411 next tab stop. */
412 column -= !!column;
413 next_tab_column = column;
414 tab_index -= !!tab_index;
416 else
418 column++;
419 if (!column)
420 error (EXIT_FAILURE, 0, _("input line is too long"));
423 if (pending)
425 if (fwrite (pending_blank, 1, pending, stdout) != pending)
426 error (EXIT_FAILURE, errno, _("write error"));
427 pending = 0;
428 one_blank_before_tab_stop = false;
431 prev_blank = blank;
432 convert &= convert_entire_line || blank;
435 if (c < 0)
437 free (pending_blank);
438 return;
441 if (putchar (c) < 0)
442 error (EXIT_FAILURE, errno, _("write error"));
444 while (c != '\n');
449 main (int argc, char **argv)
451 bool have_tabval = false;
452 uintmax_t tabval IF_LINT (= 0);
453 int c;
455 /* If true, cancel the effect of any -a (explicit or implicit in -t),
456 so that only leading blanks will be considered. */
457 bool convert_first_only = false;
459 initialize_main (&argc, &argv);
460 set_program_name (argv[0]);
461 setlocale (LC_ALL, "");
462 bindtextdomain (PACKAGE, LOCALEDIR);
463 textdomain (PACKAGE);
465 atexit (close_stdout);
467 have_read_stdin = false;
468 exit_status = EXIT_SUCCESS;
469 convert_entire_line = false;
470 tab_list = NULL;
471 first_free_tab = 0;
473 while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL))
474 != -1)
476 switch (c)
478 case '?':
479 usage (EXIT_FAILURE);
480 case 'a':
481 convert_entire_line = true;
482 break;
483 case 't':
484 convert_entire_line = true;
485 parse_tab_stops (optarg);
486 break;
487 case CONVERT_FIRST_ONLY_OPTION:
488 convert_first_only = true;
489 break;
490 case ',':
491 if (have_tabval)
492 add_tab_stop (tabval);
493 have_tabval = false;
494 break;
495 case_GETOPT_HELP_CHAR;
496 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
497 default:
498 if (!have_tabval)
500 tabval = 0;
501 have_tabval = true;
503 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t))
504 error (EXIT_FAILURE, 0, _("tab stop value is too large"));
505 break;
509 if (convert_first_only)
510 convert_entire_line = false;
512 if (have_tabval)
513 add_tab_stop (tabval);
515 validate_tab_stops (tab_list, first_free_tab);
517 if (first_free_tab == 0)
518 tab_size = max_column_width = 8;
519 else if (first_free_tab == 1)
520 tab_size = tab_list[0];
521 else
522 tab_size = 0;
524 file_list = (optind < argc ? &argv[optind] : stdin_argv);
526 unexpand ();
528 if (have_read_stdin && fclose (stdin) != 0)
529 error (EXIT_FAILURE, errno, "-");
531 exit (exit_status);