expand: check for colno overflow
[coreutils.git] / src / unexpand.c
blob0d10ffe393edd142658ae69cac59a11706a79b36
1 /* unexpand - convert blanks to tabs
2 Copyright (C) 1989-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* By default, convert only maximal strings of initial blanks and tabs
18 into tabs.
19 Preserves backspace characters in the output; they decrement the
20 column count for tab calculations.
21 The default action is equivalent to -8.
23 Options:
24 --tabs=tab1[,tab2[,...]]
25 -t tab1[,tab2[,...]]
26 -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
27 columns apart instead of the default 8. Otherwise,
28 set the tabs at columns tab1, tab2, etc. (numbered from
29 0); preserve any blanks beyond the tab stops given.
30 --all
31 -a Use tabs wherever they would replace 2 or more blanks,
32 not just at the beginnings of lines.
34 David MacKenzie <djm@gnu.ai.mit.edu> */
36 #include <config.h>
38 #include <ctype.h>
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
43 #include "expand-common.h"
45 /* The official name of this program (e.g., no 'g' prefix). */
46 #define PROGRAM_NAME "unexpand"
48 #define AUTHORS proper_name ("David MacKenzie")
52 /* For long options that have no equivalent short option, use a
53 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
54 enum
56 CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
59 static struct option const longopts[] =
61 {"tabs", required_argument, nullptr, 't'},
62 {"all", no_argument, nullptr, 'a'},
63 {"first-only", no_argument, nullptr, CONVERT_FIRST_ONLY_OPTION},
64 {GETOPT_HELP_OPTION_DECL},
65 {GETOPT_VERSION_OPTION_DECL},
66 {nullptr, 0, nullptr, 0}
69 void
70 usage (int status)
72 if (status != EXIT_SUCCESS)
73 emit_try_help ();
74 else
76 printf (_("\
77 Usage: %s [OPTION]... [FILE]...\n\
78 "),
79 program_name);
80 fputs (_("\
81 Convert blanks in each FILE to tabs, writing to standard output.\n\
82 "), stdout);
84 emit_stdin_note ();
85 emit_mandatory_arg_note ();
87 fputs (_("\
88 -a, --all convert all blanks, instead of just initial blanks\n\
89 --first-only convert only leading sequences of blanks (overrides -a)\n\
90 -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
91 "), stdout);
92 emit_tab_list_info ();
93 fputs (HELP_OPTION_DESCRIPTION, stdout);
94 fputs (VERSION_OPTION_DESCRIPTION, stdout);
95 emit_ancillary_info (PROGRAM_NAME);
97 exit (status);
100 /* Change blanks to tabs, writing to stdout.
101 Read each file in 'file_list', in order. */
103 static void
104 unexpand (void)
106 /* Input stream. */
107 FILE *fp = next_file (nullptr);
109 /* The array of pending blanks. In non-POSIX locales, blanks can
110 include characters other than spaces, so the blanks must be
111 stored, not merely counted. */
112 char *pending_blank;
114 if (!fp)
115 return;
117 /* The worst case is a non-blank character, then one blank, then a
118 tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
119 allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
120 pending_blank = ximalloc (max_column_width);
122 while (true)
124 /* Input character, or EOF. */
125 int c;
127 /* If true, perform translations. */
128 bool convert = true;
131 /* The following variables have valid values only when CONVERT
132 is true: */
134 /* Column of next input character. */
135 colno column = 0;
137 /* Column the next input tab stop is on. */
138 colno next_tab_column = 0;
140 /* Index in TAB_LIST of next tab stop to examine. */
141 idx_t tab_index = 0;
143 /* If true, the first pending blank came just before a tab stop. */
144 bool one_blank_before_tab_stop = false;
146 /* If true, the previous input character was a blank. This is
147 initially true, since initial strings of blanks are treated
148 as if the line was preceded by a blank. */
149 bool prev_blank = true;
151 /* Number of pending columns of blanks. */
152 idx_t pending = 0;
155 /* Convert a line of text. */
159 while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
160 continue;
162 if (convert)
164 bool blank = !! isblank (c);
166 if (blank)
168 bool last_tab;
170 next_tab_column = get_next_tab_column (column, &tab_index,
171 &last_tab);
173 if (last_tab)
174 convert = false;
176 if (convert)
178 if (c == '\t')
180 column = next_tab_column;
182 if (pending)
183 pending_blank[0] = '\t';
185 else
187 column++;
189 if (! (prev_blank && column == next_tab_column))
191 /* It is not yet known whether the pending blanks
192 will be replaced by tabs. */
193 if (column == next_tab_column)
194 one_blank_before_tab_stop = true;
195 pending_blank[pending++] = c;
196 prev_blank = true;
197 continue;
200 /* Replace the pending blanks by a tab or two. */
201 pending_blank[0] = c = '\t';
204 /* Discard pending blanks, unless it was a single
205 blank just before the previous tab stop. */
206 pending = one_blank_before_tab_stop;
209 else if (c == '\b')
211 /* Go back one column, and force recalculation of the
212 next tab stop. */
213 column -= !!column;
214 next_tab_column = column;
215 tab_index -= !!tab_index;
217 else
219 column++;
220 if (!column)
221 error (EXIT_FAILURE, 0, _("input line is too long"));
224 if (pending)
226 if (pending > 1 && one_blank_before_tab_stop)
227 pending_blank[0] = '\t';
228 if (fwrite (pending_blank, 1, pending, stdout) != pending)
229 write_error ();
230 pending = 0;
231 one_blank_before_tab_stop = false;
234 prev_blank = blank;
235 convert &= convert_entire_line || blank;
238 if (c < 0)
240 free (pending_blank);
241 return;
244 if (putchar (c) < 0)
245 write_error ();
247 while (c != '\n');
252 main (int argc, char **argv)
254 bool have_tabval = false;
255 colno tabval IF_LINT ( = 0);
256 int c;
258 /* If true, cancel the effect of any -a (explicit or implicit in -t),
259 so that only leading blanks will be considered. */
260 bool convert_first_only = false;
262 initialize_main (&argc, &argv);
263 set_program_name (argv[0]);
264 setlocale (LC_ALL, "");
265 bindtextdomain (PACKAGE, LOCALEDIR);
266 textdomain (PACKAGE);
268 atexit (close_stdout);
270 while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, nullptr))
271 != -1)
273 switch (c)
275 case '?':
276 usage (EXIT_FAILURE);
277 case 'a':
278 convert_entire_line = true;
279 break;
280 case 't':
281 convert_entire_line = true;
282 parse_tab_stops (optarg);
283 break;
284 case CONVERT_FIRST_ONLY_OPTION:
285 convert_first_only = true;
286 break;
287 case ',':
288 if (have_tabval)
289 add_tab_stop (tabval);
290 have_tabval = false;
291 break;
292 case_GETOPT_HELP_CHAR;
293 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
294 default:
295 if (!have_tabval)
297 tabval = 0;
298 have_tabval = true;
300 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0'))
301 error (EXIT_FAILURE, 0, _("tab stop value is too large"));
302 break;
306 if (convert_first_only)
307 convert_entire_line = false;
309 if (have_tabval)
310 add_tab_stop (tabval);
312 finalize_tab_stops ();
314 set_file_list ((optind < argc) ? &argv[optind] : nullptr);
316 unexpand ();
318 cleanup_file_list_stdin ();
320 return exit_status;