.
[coreutils.git] / src / unexpand.c
blobc65c210a932f3b132fbd7b38e7ac285db5a96850
1 /* unexpand - convert spaces to tabs
2 Copyright (C) 89, 91, 1995-2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By default, convert only maximal strings of initial blanks and tabs
19 into tabs.
20 Preserves backspace characters in the output; they decrement the
21 column count for tab calculations.
22 The default action is equivalent to -8.
24 Options:
25 --tabs=tab1[,tab2[,...]]
26 -t tab1[,tab2[,...]]
27 -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
28 spaces apart instead of the default 8. Otherwise,
29 set the tabs at columns tab1, tab2, etc. (numbered from
30 0); replace any tabs beyond the tabstops given with
31 single spaces.
32 --all
33 -a Use tabs wherever they would replace 2 or more spaces,
34 not just at the beginnings of lines.
36 David MacKenzie <djm@gnu.ai.mit.edu> */
38 #include <config.h>
40 #include <stdio.h>
41 #include <getopt.h>
42 #include <sys/types.h>
43 #include "system.h"
44 #include "error.h"
45 #include "posixver.h"
47 /* The official name of this program (e.g., no `g' prefix). */
48 #define PROGRAM_NAME "unexpand"
50 #define AUTHORS "David MacKenzie"
52 /* The number of bytes added at a time to the amount of memory
53 allocated for the output line. */
54 #define OUTPUT_BLOCK 256
56 /* The number of bytes added at a time to the amount of memory
57 allocated for the list of tabstops. */
58 #define TABLIST_BLOCK 256
60 /* A sentinel value that's placed at the end of the list of tab stops.
61 This value must be a large number, but not so large that adding the
62 length of a line to it would cause the column variable to overflow. */
63 #define TAB_STOP_SENTINEL INT_MAX
65 /* The name this program was run with. */
66 char *program_name;
68 /* If nonzero, convert blanks even after nonblank characters have been
69 read on the line. */
70 static int convert_entire_line;
72 /* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
73 static int tab_size;
75 /* Array of the explicit column numbers of the tab stops;
76 after `tab_list' is exhausted, the rest of the line is printed
77 unchanged. The first column is column 0. */
78 static int *tab_list;
79 static size_t n_tabs_allocated;
81 /* The index of the first invalid element of `tab_list',
82 where the next element can be added. */
83 static int first_free_tab;
85 /* Null-terminated array of input filenames. */
86 static char **file_list;
88 /* Default for `file_list' if no files are given on the command line. */
89 static char *stdin_argv[] =
91 "-", NULL
94 /* Nonzero if we have ever read standard input. */
95 static int have_read_stdin;
97 /* Status to return to the system. */
98 static int exit_status;
100 /* For long options that have no equivalent short option, use a
101 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
102 enum
104 CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
107 static struct option const longopts[] =
109 {"tabs", required_argument, NULL, 't'},
110 {"all", no_argument, NULL, 'a'},
111 {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION},
112 {GETOPT_HELP_OPTION_DECL},
113 {GETOPT_VERSION_OPTION_DECL},
114 {NULL, 0, NULL, 0}
117 /* Add tab stop TABVAL to the end of `tab_list', except
118 if TABVAL is -1, do nothing. */
120 static void
121 add_tabstop (int tabval)
123 if (tabval == -1)
124 return;
125 if (first_free_tab == n_tabs_allocated)
126 tab_list = x2nrealloc (tab_list, &n_tabs_allocated, sizeof *tab_list);
127 tab_list[first_free_tab++] = tabval;
130 /* Add the comma or blank separated list of tabstops STOPS
131 to the list of tabstops. */
133 static void
134 parse_tabstops (const char *stops)
136 int tabval = -1;
138 for (; *stops; stops++)
140 if (*stops == ',' || ISBLANK (*stops))
142 add_tabstop (tabval);
143 tabval = -1;
145 else if (ISDIGIT (*stops))
147 if (tabval == -1)
148 tabval = 0;
149 tabval = tabval * 10 + *stops - '0';
151 else
152 error (EXIT_FAILURE, 0, _("tab size contains an invalid character"));
155 add_tabstop (tabval);
158 /* Check that the list of tabstops TABS, with ENTRIES entries,
159 contains only nonzero, ascending values. */
161 static void
162 validate_tabstops (const int *tabs, int entries)
164 int prev_tab = 0;
165 int i;
167 for (i = 0; i < entries; i++)
169 if (tabs[i] == 0)
170 error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
171 if (tabs[i] <= prev_tab)
172 error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
173 prev_tab = tabs[i];
177 /* Close the old stream pointer FP if it is non-NULL,
178 and return a new one opened to read the next input file.
179 Open a filename of `-' as the standard input.
180 Return NULL if there are no more input files. */
182 static FILE *
183 next_file (FILE *fp)
185 static char *prev_file;
186 char *file;
188 if (fp)
190 if (ferror (fp))
192 error (0, errno, "%s", prev_file);
193 exit_status = 1;
195 if (fp == stdin)
196 clearerr (fp); /* Also clear EOF. */
197 else if (fclose (fp) == EOF)
199 error (0, errno, "%s", prev_file);
200 exit_status = 1;
204 while ((file = *file_list++) != NULL)
206 if (file[0] == '-' && file[1] == '\0')
208 have_read_stdin = 1;
209 prev_file = file;
210 return stdin;
212 fp = fopen (file, "r");
213 if (fp)
215 prev_file = file;
216 return fp;
218 error (0, errno, "%s", file);
219 exit_status = 1;
221 return NULL;
224 /* Change spaces to tabs, writing to stdout.
225 Read each file in `file_list', in order. */
227 static void
228 unexpand (void)
230 FILE *fp; /* Input stream. */
231 int c; /* Each input character. */
232 /* Index in `tab_list' of next tabstop: */
233 int tab_index = 0; /* For calculating width of pending tabs. */
234 int print_tab_index = 0; /* For printing as many tabs as possible. */
235 unsigned int column = 0; /* Column on screen of next char. */
236 int next_tab_column; /* Column the next tab stop is on. */
237 int convert = 1; /* If nonzero, perform translations. */
238 unsigned int pending = 0; /* Pending columns of blanks. */
239 int saved_errno;
241 fp = next_file ((FILE *) NULL);
242 if (fp == NULL)
243 return;
245 /* Binary I/O will preserve the original EOL style (DOS/Unix) of files. */
246 SET_BINARY2 (fileno (fp), STDOUT_FILENO);
248 for (;;)
250 c = getc (fp);
251 saved_errno = errno;
253 if (c == ' ' && convert && column < TAB_STOP_SENTINEL)
255 ++pending;
256 ++column;
258 else if (c == '\t' && convert)
260 if (tab_size == 0)
262 /* Do not let tab_index == first_free_tab;
263 stop when it is 1 less. */
264 while (tab_index < first_free_tab - 1
265 && column >= tab_list[tab_index])
266 tab_index++;
267 next_tab_column = tab_list[tab_index];
268 if (tab_index < first_free_tab - 1)
269 tab_index++;
270 if (column >= next_tab_column)
272 convert = 0; /* Ran out of tab stops. */
273 goto flush_pend;
276 else
278 next_tab_column = column + tab_size - column % tab_size;
280 pending += next_tab_column - column;
281 column = next_tab_column;
283 else
285 flush_pend:
286 /* Flush pending spaces. Print as many tabs as possible,
287 then print the rest as spaces. */
288 if (pending == 1)
290 putchar (' ');
291 pending = 0;
293 column -= pending;
294 while (pending > 0)
296 if (tab_size == 0)
298 /* Do not let print_tab_index == first_free_tab;
299 stop when it is 1 less. */
300 while (print_tab_index < first_free_tab - 1
301 && column >= tab_list[print_tab_index])
302 print_tab_index++;
303 next_tab_column = tab_list[print_tab_index];
304 if (print_tab_index < first_free_tab - 1)
305 print_tab_index++;
307 else
309 next_tab_column = column + tab_size - column % tab_size;
311 if (next_tab_column - column <= pending)
313 putchar ('\t');
314 pending -= next_tab_column - column;
315 column = next_tab_column;
317 else
319 --print_tab_index;
320 column += pending;
321 while (pending != 0)
323 putchar (' ');
324 pending--;
329 if (c == EOF)
331 errno = saved_errno;
332 fp = next_file (fp);
333 if (fp == NULL)
334 break; /* No more files. */
335 else
337 SET_BINARY2 (fileno (fp), STDOUT_FILENO);
338 continue;
342 if (convert)
344 if (c == '\b')
346 if (column > 0)
347 --column;
349 else
351 ++column;
352 if (convert_entire_line == 0)
353 convert = 0;
357 putchar (c);
359 if (c == '\n')
361 tab_index = print_tab_index = 0;
362 column = pending = 0;
363 convert = 1;
369 void
370 usage (int status)
372 if (status != 0)
373 fprintf (stderr, _("Try `%s --help' for more information.\n"),
374 program_name);
375 else
377 printf (_("\
378 Usage: %s [OPTION]... [FILE]...\n\
380 program_name);
381 fputs (_("\
382 Convert spaces in each FILE to tabs, writing to standard output.\n\
383 With no FILE, or when FILE is -, read standard input.\n\
385 "), stdout);
386 fputs (_("\
387 Mandatory arguments to long options are mandatory for short options too.\n\
388 "), stdout);
389 fputs (_("\
390 -a, --all convert all whitespace, instead of just initial whitespace\n\
391 --first-only convert only leading sequences of whitespace (overrides -a)\n\
392 -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
393 -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\
394 "), stdout);
395 fputs (HELP_OPTION_DESCRIPTION, stdout);
396 fputs (VERSION_OPTION_DESCRIPTION, stdout);
397 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
399 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
403 main (int argc, char **argv)
405 int tabval = -1; /* Value of tabstop being read, or -1. */
406 int c; /* Option character. */
408 /* If nonzero, cancel the effect of any -a (explicit or implicit in -t),
409 so that only leading white space will be considered. */
410 int convert_first_only = 0;
412 bool obsolete_tablist = false;
414 initialize_main (&argc, &argv);
415 program_name = argv[0];
416 setlocale (LC_ALL, "");
417 bindtextdomain (PACKAGE, LOCALEDIR);
418 textdomain (PACKAGE);
420 atexit (close_stdout);
422 have_read_stdin = 0;
423 exit_status = 0;
424 convert_entire_line = 0;
425 tab_list = NULL;
426 first_free_tab = 0;
428 while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL))
429 != -1)
431 switch (c)
433 case 0:
434 break;
436 case '?':
437 usage (EXIT_FAILURE);
438 case 'a':
439 convert_entire_line = 1;
440 break;
441 case 't':
442 convert_entire_line = 1;
443 parse_tabstops (optarg);
444 break;
445 case CONVERT_FIRST_ONLY_OPTION:
446 convert_first_only = 1;
447 break;
448 case ',':
449 add_tabstop (tabval);
450 tabval = -1;
451 obsolete_tablist = true;
452 break;
453 case_GETOPT_HELP_CHAR;
454 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
455 default:
456 if (tabval == -1)
457 tabval = 0;
458 tabval = tabval * 10 + c - '0';
459 obsolete_tablist = true;
460 break;
464 if (obsolete_tablist && 200112 <= posix2_version ())
466 error (0, 0,
467 _("`-LIST' option is obsolete; use `--first-only -t LIST'"));
468 usage (EXIT_FAILURE);
471 if (convert_first_only)
472 convert_entire_line = 0;
474 add_tabstop (tabval);
476 validate_tabstops (tab_list, first_free_tab);
478 if (first_free_tab == 0)
479 tab_size = 8;
480 else if (first_free_tab == 1)
481 tab_size = tab_list[0];
482 else
484 /* Append a sentinel to the list of tab stop indices. */
485 add_tabstop (TAB_STOP_SENTINEL);
486 tab_size = 0;
489 file_list = (optind < argc ? &argv[optind] : stdin_argv);
491 unexpand ();
493 if (have_read_stdin && fclose (stdin) == EOF)
494 error (EXIT_FAILURE, errno, "-");
495 exit (exit_status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);