version 8.7
[coreutils.git] / src / paste.c
blobdbbf52df5d06b53e9bf59d20d5cca1f18eed003d
1 /* paste - merge lines of files
2 Copyright (C) 1997-2005, 2008-2010 Free Software Foundation, Inc.
3 Copyright (C) 1984 David M. Ihnat
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 /* Written by David Ihnat. */
20 /* The list of valid escape sequences has been expanded over the Unix
21 version, to include \b, \f, \r, and \v.
23 POSIX changes, bug fixes, long-named options, and cleanup
24 by David MacKenzie <djm@gnu.ai.mit.edu>.
26 Options:
27 --serial
28 -s Paste one file at a time rather than
29 one line from each file.
30 --delimiters=delim-list
31 -d delim-list Consecutively use the characters in
32 DELIM-LIST instead of tab to separate
33 merged lines. When DELIM-LIST is exhausted,
34 start again at its beginning.
35 A FILE of `-' means standard input.
36 If no FILEs are given, standard input is used. */
38 #include <config.h>
40 #include <stdio.h>
41 #include <getopt.h>
42 #include <sys/types.h>
43 #include "system.h"
44 #include "error.h"
45 #include "fadvise.h"
46 #include "quotearg.h"
48 /* The official name of this program (e.g., no `g' prefix). */
49 #define PROGRAM_NAME "paste"
51 #define AUTHORS \
52 proper_name ("David M. Ihnat"), \
53 proper_name ("David MacKenzie")
55 /* Indicates that no delimiter should be added in the current position. */
56 #define EMPTY_DELIM '\0'
58 /* If nonzero, we have read standard input at some point. */
59 static bool have_read_stdin;
61 /* If nonzero, merge subsequent lines of each file rather than
62 corresponding lines from each file in parallel. */
63 static bool serial_merge;
65 /* The delimeters between lines of input files (used cyclically). */
66 static char *delims;
68 /* A pointer to the character after the end of `delims'. */
69 static char const *delim_end;
71 static struct option const longopts[] =
73 {"serial", no_argument, NULL, 's'},
74 {"delimiters", required_argument, NULL, 'd'},
75 {GETOPT_HELP_OPTION_DECL},
76 {GETOPT_VERSION_OPTION_DECL},
77 {NULL, 0, NULL, 0}
80 /* Set globals delims and delim_end. Copy STRPTR to DELIMS, converting
81 backslash representations of special characters in STRPTR to their actual
82 values. The set of possible backslash characters has been expanded beyond
83 that recognized by the Unix version.
84 Return 0 upon success.
85 If the string ends in an odd number of backslashes, ignore the
86 final backslash and return nonzero. */
88 static int
89 collapse_escapes (char const *strptr)
91 char *strout = xstrdup (strptr);
92 bool backslash_at_end = false;
94 delims = strout;
96 while (*strptr)
98 if (*strptr != '\\') /* Is it an escape character? */
99 *strout++ = *strptr++; /* No, just transfer it. */
100 else
102 switch (*++strptr)
104 case '0':
105 *strout++ = EMPTY_DELIM;
106 break;
108 case 'b':
109 *strout++ = '\b';
110 break;
112 case 'f':
113 *strout++ = '\f';
114 break;
116 case 'n':
117 *strout++ = '\n';
118 break;
120 case 'r':
121 *strout++ = '\r';
122 break;
124 case 't':
125 *strout++ = '\t';
126 break;
128 case 'v':
129 *strout++ = '\v';
130 break;
132 case '\\':
133 *strout++ = '\\';
134 break;
136 case '\0':
137 backslash_at_end = true;
138 goto done;
140 default:
141 *strout++ = *strptr;
142 break;
144 strptr++;
148 done:;
150 delim_end = strout;
151 return backslash_at_end ? 1 : 0;
154 /* Report a write error and exit. */
156 static void write_error (void) ATTRIBUTE_NORETURN;
157 static void
158 write_error (void)
160 error (EXIT_FAILURE, errno, _("write error"));
161 abort ();
164 /* Output a single byte, reporting any write errors. */
166 static inline void
167 xputchar (char c)
169 if (putchar (c) < 0)
170 write_error ();
173 /* Perform column paste on the NFILES files named in FNAMPTR.
174 Return true if successful, false if one or more files could not be
175 opened or read. */
177 static bool
178 paste_parallel (size_t nfiles, char **fnamptr)
180 bool ok = true;
181 /* If all files are just ready to be closed, or will be on this
182 round, the string of delimiters must be preserved.
183 delbuf[0] through delbuf[nfiles]
184 store the delimiters for closed files. */
185 char *delbuf = xmalloc (nfiles + 2);
187 /* Streams open to the files to process; NULL if the corresponding
188 stream is closed. */
189 FILE **fileptr = xnmalloc (nfiles + 1, sizeof *fileptr);
191 /* Number of files still open to process. */
192 size_t files_open;
194 /* True if any fopen got fd == STDIN_FILENO. */
195 bool opened_stdin = false;
197 /* Attempt to open all files. This could be expanded to an infinite
198 number of files, but at the (considerable) expense of remembering
199 each file and its current offset, then opening/reading/closing. */
201 for (files_open = 0; files_open < nfiles; ++files_open)
203 if (STREQ (fnamptr[files_open], "-"))
205 have_read_stdin = true;
206 fileptr[files_open] = stdin;
208 else
210 fileptr[files_open] = fopen (fnamptr[files_open], "r");
211 if (fileptr[files_open] == NULL)
212 error (EXIT_FAILURE, errno, "%s", fnamptr[files_open]);
213 else if (fileno (fileptr[files_open]) == STDIN_FILENO)
214 opened_stdin = true;
215 fadvise (fileptr[files_open], FADVISE_SEQUENTIAL);
219 if (opened_stdin && have_read_stdin)
220 error (EXIT_FAILURE, 0, _("standard input is closed"));
222 /* Read a line from each file and output it to stdout separated by a
223 delimiter, until we go through the loop without successfully
224 reading from any of the files. */
226 while (files_open)
228 /* Set up for the next line. */
229 bool somedone = false;
230 char const *delimptr = delims;
231 size_t delims_saved = 0; /* Number of delims saved in `delbuf'. */
232 size_t i;
234 for (i = 0; i < nfiles && files_open; i++)
236 int chr IF_LINT ( = 0); /* Input character. */
237 int err IF_LINT ( = 0); /* Input errno value. */
238 size_t line_length = 0; /* Number of chars in line. */
240 if (fileptr[i])
242 chr = getc (fileptr[i]);
243 err = errno;
244 if (chr != EOF && delims_saved)
246 if (fwrite (delbuf, 1, delims_saved, stdout) != delims_saved)
247 write_error ();
248 delims_saved = 0;
251 while (chr != EOF)
253 line_length++;
254 if (chr == '\n')
255 break;
256 xputchar (chr);
257 chr = getc (fileptr[i]);
258 err = errno;
262 if (line_length == 0)
264 /* EOF, read error, or closed file.
265 If an EOF or error, close the file. */
266 if (fileptr[i])
268 if (ferror (fileptr[i]))
270 error (0, err, "%s", fnamptr[i]);
271 ok = false;
273 if (fileptr[i] == stdin)
274 clearerr (fileptr[i]); /* Also clear EOF. */
275 else if (fclose (fileptr[i]) == EOF)
277 error (0, errno, "%s", fnamptr[i]);
278 ok = false;
281 fileptr[i] = NULL;
282 files_open--;
285 if (i + 1 == nfiles)
287 /* End of this output line.
288 Is this the end of the whole thing? */
289 if (somedone)
291 /* No. Some files were not closed for this line. */
292 if (delims_saved)
294 if (fwrite (delbuf, 1, delims_saved, stdout)
295 != delims_saved)
296 write_error ();
297 delims_saved = 0;
299 xputchar ('\n');
301 continue; /* Next read of files, or exit. */
303 else
305 /* Closed file; add delimiter to `delbuf'. */
306 if (*delimptr != EMPTY_DELIM)
307 delbuf[delims_saved++] = *delimptr;
308 if (++delimptr == delim_end)
309 delimptr = delims;
312 else
314 /* Some data read. */
315 somedone = true;
317 /* Except for last file, replace last newline with delim. */
318 if (i + 1 != nfiles)
320 if (chr != '\n' && chr != EOF)
321 xputchar (chr);
322 if (*delimptr != EMPTY_DELIM)
323 xputchar (*delimptr);
324 if (++delimptr == delim_end)
325 delimptr = delims;
327 else
329 /* If the last line of the last file lacks a newline,
330 print one anyhow. POSIX requires this. */
331 char c = (chr == EOF ? '\n' : chr);
332 xputchar (c);
337 free (fileptr);
338 free (delbuf);
339 return ok;
342 /* Perform serial paste on the NFILES files named in FNAMPTR.
343 Return true if no errors, false if one or more files could not be
344 opened or read. */
346 static bool
347 paste_serial (size_t nfiles, char **fnamptr)
349 bool ok = true; /* false if open or read errors occur. */
350 int charnew, charold; /* Current and previous char read. */
351 char const *delimptr; /* Current delimiter char. */
352 FILE *fileptr; /* Open for reading current file. */
354 for (; nfiles; nfiles--, fnamptr++)
356 int saved_errno;
357 bool is_stdin = STREQ (*fnamptr, "-");
358 if (is_stdin)
360 have_read_stdin = true;
361 fileptr = stdin;
363 else
365 fileptr = fopen (*fnamptr, "r");
366 if (fileptr == NULL)
368 error (0, errno, "%s", *fnamptr);
369 ok = false;
370 continue;
372 fadvise (fileptr, FADVISE_SEQUENTIAL);
375 delimptr = delims; /* Set up for delimiter string. */
377 charold = getc (fileptr);
378 saved_errno = errno;
379 if (charold != EOF)
381 /* `charold' is set up. Hit it!
382 Keep reading characters, stashing them in `charnew';
383 output `charold', converting to the appropriate delimiter
384 character if needed. After the EOF, output `charold'
385 if it's a newline; otherwise, output it and then a newline. */
387 while ((charnew = getc (fileptr)) != EOF)
389 /* Process the old character. */
390 if (charold == '\n')
392 if (*delimptr != EMPTY_DELIM)
393 xputchar (*delimptr);
395 if (++delimptr == delim_end)
396 delimptr = delims;
398 else
399 xputchar (charold);
401 charold = charnew;
403 saved_errno = errno;
405 /* Hit EOF. Process that last character. */
406 xputchar (charold);
409 if (charold != '\n')
410 xputchar ('\n');
412 if (ferror (fileptr))
414 error (0, saved_errno, "%s", *fnamptr);
415 ok = false;
417 if (is_stdin)
418 clearerr (fileptr); /* Also clear EOF. */
419 else if (fclose (fileptr) == EOF)
421 error (0, errno, "%s", *fnamptr);
422 ok = false;
425 return ok;
428 void
429 usage (int status)
431 if (status != EXIT_SUCCESS)
432 fprintf (stderr, _("Try `%s --help' for more information.\n"),
433 program_name);
434 else
436 printf (_("\
437 Usage: %s [OPTION]... [FILE]...\n\
439 program_name);
440 fputs (_("\
441 Write lines consisting of the sequentially corresponding lines from\n\
442 each FILE, separated by TABs, to standard output.\n\
443 With no FILE, or when FILE is -, read standard input.\n\
445 "), stdout);
446 fputs (_("\
447 Mandatory arguments to long options are mandatory for short options too.\n\
448 "), stdout);
449 fputs (_("\
450 -d, --delimiters=LIST reuse characters from LIST instead of TABs\n\
451 -s, --serial paste one file at a time instead of in parallel\n\
452 "), stdout);
453 fputs (HELP_OPTION_DESCRIPTION, stdout);
454 fputs (VERSION_OPTION_DESCRIPTION, stdout);
455 /* FIXME: add a couple of examples. */
456 emit_ancillary_info ();
458 exit (status);
462 main (int argc, char **argv)
464 int optc;
465 bool ok;
466 char const *delim_arg = "\t";
468 initialize_main (&argc, &argv);
469 set_program_name (argv[0]);
470 setlocale (LC_ALL, "");
471 bindtextdomain (PACKAGE, LOCALEDIR);
472 textdomain (PACKAGE);
474 atexit (close_stdout);
476 have_read_stdin = false;
477 serial_merge = false;
479 while ((optc = getopt_long (argc, argv, "d:s", longopts, NULL)) != -1)
481 switch (optc)
483 case 'd':
484 /* Delimiter character(s). */
485 delim_arg = (optarg[0] == '\0' ? "\\0" : optarg);
486 break;
488 case 's':
489 serial_merge = true;
490 break;
492 case_GETOPT_HELP_CHAR;
494 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
496 default:
497 usage (EXIT_FAILURE);
501 if (optind == argc)
502 argv[argc++] = bad_cast ("-");
504 if (collapse_escapes (delim_arg))
506 /* Don't use the default quoting style, because that would double the
507 number of displayed backslashes, making the diagnostic look bogus. */
508 set_quoting_style (NULL, escape_quoting_style);
509 error (EXIT_FAILURE, 0,
510 _("delimiter list ends with an unescaped backslash: %s"),
511 quotearg_colon (delim_arg));
514 if (!serial_merge)
515 ok = paste_parallel (argc - optind, &argv[optind]);
516 else
517 ok = paste_serial (argc - optind, &argv[optind]);
519 free (delims);
521 if (have_read_stdin && fclose (stdin) == EOF)
522 error (EXIT_FAILURE, errno, "-");
523 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);