build: merge in bootstrap changes from gnulib
[coreutils.git] / src / tac.c
blob01150a3e87b31d5f3e6ee627aa573e2d887408da
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2006, 2008-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
20 /* Copy each FILE, or the standard input if none are given or when a
21 FILE name of "-" is encountered, to the standard output with the
22 order of the records reversed. The records are separated by
23 instances of a string, or a newline if none is given. By default, the
24 separator string is attached to the end of the record that it
25 follows in the file.
27 Options:
28 -b, --before The separator is attached to the beginning
29 of the record that it precedes in the file.
30 -r, --regex The separator is a regular expression.
31 -s, --separator=separator Use SEPARATOR as the record separator.
33 To reverse a file byte by byte, use (in bash, ksh, or sh):
34 tac -r -s '.\|
35 ' file */
37 #include <config.h>
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
44 #include <regex.h>
46 #include "error.h"
47 #include "filenamecat.h"
48 #include "quote.h"
49 #include "quotearg.h"
50 #include "safe-read.h"
51 #include "stdlib--.h"
52 #include "xfreopen.h"
54 /* The official name of this program (e.g., no `g' prefix). */
55 #define PROGRAM_NAME "tac"
57 #define AUTHORS \
58 proper_name ("Jay Lepreau"), \
59 proper_name ("David MacKenzie")
61 #if defined __MSDOS__ || defined _WIN32
62 /* Define this to non-zero on systems for which the regular mechanism
63 (of unlinking an open file and expecting to be able to write, seek
64 back to the beginning, then reread it) doesn't work. E.g., on Windows
65 and DOS systems. */
66 # define DONT_UNLINK_WHILE_OPEN 1
67 #endif
70 #ifndef DEFAULT_TMPDIR
71 # define DEFAULT_TMPDIR "/tmp"
72 #endif
74 /* The number of bytes per atomic read. */
75 #define INITIAL_READSIZE 8192
77 /* The number of bytes per atomic write. */
78 #define WRITESIZE 8192
80 /* The string that separates the records of the file. */
81 static char const *separator;
83 /* True if we have ever read standard input. */
84 static bool have_read_stdin = false;
86 /* If true, print `separator' along with the record preceding it
87 in the file; otherwise with the record following it. */
88 static bool separator_ends_record;
90 /* 0 if `separator' is to be matched as a regular expression;
91 otherwise, the length of `separator', used as a sentinel to
92 stop the search. */
93 static size_t sentinel_length;
95 /* The length of a match with `separator'. If `sentinel_length' is 0,
96 `match_length' is computed every time a match succeeds;
97 otherwise, it is simply the length of `separator'. */
98 static size_t match_length;
100 /* The input buffer. */
101 static char *G_buffer;
103 /* The number of bytes to read at once into `buffer'. */
104 static size_t read_size;
106 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
107 The extra 2 bytes allow `past_end' to have a value beyond the
108 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
109 static size_t G_buffer_size;
111 /* The compiled regular expression representing `separator'. */
112 static struct re_pattern_buffer compiled_separator;
113 static char compiled_separator_fastmap[UCHAR_MAX + 1];
114 static struct re_registers regs;
116 static struct option const longopts[] =
118 {"before", no_argument, NULL, 'b'},
119 {"regex", no_argument, NULL, 'r'},
120 {"separator", required_argument, NULL, 's'},
121 {GETOPT_HELP_OPTION_DECL},
122 {GETOPT_VERSION_OPTION_DECL},
123 {NULL, 0, NULL, 0}
126 void
127 usage (int status)
129 if (status != EXIT_SUCCESS)
130 fprintf (stderr, _("Try `%s --help' for more information.\n"),
131 program_name);
132 else
134 printf (_("\
135 Usage: %s [OPTION]... [FILE]...\n\
137 program_name);
138 fputs (_("\
139 Write each FILE to standard output, last line first.\n\
140 With no FILE, or when FILE is -, read standard input.\n\
142 "), stdout);
143 fputs (_("\
144 Mandatory arguments to long options are mandatory for short options too.\n\
145 "), stdout);
146 fputs (_("\
147 -b, --before attach the separator before instead of after\n\
148 -r, --regex interpret the separator as a regular expression\n\
149 -s, --separator=STRING use STRING as the separator instead of newline\n\
150 "), stdout);
151 fputs (HELP_OPTION_DESCRIPTION, stdout);
152 fputs (VERSION_OPTION_DESCRIPTION, stdout);
153 emit_ancillary_info ();
155 exit (status);
158 /* Print the characters from START to PAST_END - 1.
159 If START is NULL, just flush the buffer. */
161 static void
162 output (const char *start, const char *past_end)
164 static char buffer[WRITESIZE];
165 static size_t bytes_in_buffer = 0;
166 size_t bytes_to_add = past_end - start;
167 size_t bytes_available = WRITESIZE - bytes_in_buffer;
169 if (start == 0)
171 fwrite (buffer, 1, bytes_in_buffer, stdout);
172 bytes_in_buffer = 0;
173 return;
176 /* Write out as many full buffers as possible. */
177 while (bytes_to_add >= bytes_available)
179 memcpy (buffer + bytes_in_buffer, start, bytes_available);
180 bytes_to_add -= bytes_available;
181 start += bytes_available;
182 fwrite (buffer, 1, WRITESIZE, stdout);
183 bytes_in_buffer = 0;
184 bytes_available = WRITESIZE;
187 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
188 bytes_in_buffer += bytes_to_add;
191 /* Print in reverse the file open on descriptor FD for reading FILE.
192 Return true if successful. */
194 static bool
195 tac_seekable (int input_fd, const char *file)
197 /* Pointer to the location in `G_buffer' where the search for
198 the next separator will begin. */
199 char *match_start;
201 /* Pointer to one past the rightmost character in `G_buffer' that
202 has not been printed yet. */
203 char *past_end;
205 /* Length of the record growing in `G_buffer'. */
206 size_t saved_record_size;
208 /* Offset in the file of the next read. */
209 off_t file_pos;
211 /* True if `output' has not been called yet for any file.
212 Only used when the separator is attached to the preceding record. */
213 bool first_time = true;
214 char first_char = *separator; /* Speed optimization, non-regexp. */
215 char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
216 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
218 /* Find the size of the input file. */
219 file_pos = lseek (input_fd, 0, SEEK_END);
220 if (file_pos < 1)
221 return true; /* It's an empty file. */
223 /* Arrange for the first read to lop off enough to leave the rest of the
224 file a multiple of `read_size'. Since `read_size' can change, this may
225 not always hold during the program run, but since it usually will, leave
226 it here for i/o efficiency (page/sector boundaries and all that).
227 Note: the efficiency gain has not been verified. */
228 saved_record_size = file_pos % read_size;
229 if (saved_record_size == 0)
230 saved_record_size = read_size;
231 file_pos -= saved_record_size;
232 /* `file_pos' now points to the start of the last (probably partial) block
233 in the input file. */
235 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
236 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
238 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
240 error (0, errno, _("%s: read error"), quotearg_colon (file));
241 return false;
244 match_start = past_end = G_buffer + saved_record_size;
245 /* For non-regexp search, move past impossible positions for a match. */
246 if (sentinel_length)
247 match_start -= match_length1;
249 while (true)
251 /* Search backward from `match_start' - 1 to `G_buffer' for a match
252 with `separator'; for speed, use strncmp if `separator' contains no
253 metacharacters.
254 If the match succeeds, set `match_start' to point to the start of
255 the match and `match_length' to the length of the match.
256 Otherwise, make `match_start' < `G_buffer'. */
257 if (sentinel_length == 0)
259 size_t i = match_start - G_buffer;
260 regoff_t ri = i;
261 regoff_t range = 1 - ri;
262 regoff_t ret;
264 if (1 < range)
265 error (EXIT_FAILURE, 0, _("record too large"));
267 if (range == 1
268 || ((ret = re_search (&compiled_separator, G_buffer,
269 i, i - 1, range, &regs))
270 == -1))
271 match_start = G_buffer - 1;
272 else if (ret == -2)
274 error (EXIT_FAILURE, 0,
275 _("error in regular expression search"));
277 else
279 match_start = G_buffer + regs.start[0];
280 match_length = regs.end[0] - regs.start[0];
283 else
285 /* `match_length' is constant for non-regexp boundaries. */
286 while (*--match_start != first_char
287 || (match_length1 && strncmp (match_start + 1, separator1,
288 match_length1)))
289 /* Do nothing. */ ;
292 /* Check whether we backed off the front of `G_buffer' without finding
293 a match for `separator'. */
294 if (match_start < G_buffer)
296 if (file_pos == 0)
298 /* Hit the beginning of the file; print the remaining record. */
299 output (G_buffer, past_end);
300 return true;
303 saved_record_size = past_end - G_buffer;
304 if (saved_record_size > read_size)
306 /* `G_buffer_size' is about twice `read_size', so since
307 we want to read in another `read_size' bytes before
308 the data already in `G_buffer', we need to increase
309 `G_buffer_size'. */
310 char *newbuffer;
311 size_t offset = sentinel_length ? sentinel_length : 1;
312 ptrdiff_t match_start_offset = match_start - G_buffer;
313 ptrdiff_t past_end_offset = past_end - G_buffer;
314 size_t old_G_buffer_size = G_buffer_size;
316 read_size *= 2;
317 G_buffer_size = read_size * 2 + sentinel_length + 2;
318 if (G_buffer_size < old_G_buffer_size)
319 xalloc_die ();
320 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
321 newbuffer += offset;
322 /* Adjust the pointers for the new buffer location. */
323 match_start = newbuffer + match_start_offset;
324 past_end = newbuffer + past_end_offset;
325 G_buffer = newbuffer;
328 /* Back up to the start of the next bufferfull of the file. */
329 if (file_pos >= read_size)
330 file_pos -= read_size;
331 else
333 read_size = file_pos;
334 file_pos = 0;
336 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
337 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
339 /* Shift the pending record data right to make room for the new.
340 The source and destination regions probably overlap. */
341 memmove (G_buffer + read_size, G_buffer, saved_record_size);
342 past_end = G_buffer + read_size + saved_record_size;
343 /* For non-regexp searches, avoid unneccessary scanning. */
344 if (sentinel_length)
345 match_start = G_buffer + read_size;
346 else
347 match_start = past_end;
349 if (safe_read (input_fd, G_buffer, read_size) != read_size)
351 error (0, errno, _("%s: read error"), quotearg_colon (file));
352 return false;
355 else
357 /* Found a match of `separator'. */
358 if (separator_ends_record)
360 char *match_end = match_start + match_length;
362 /* If this match of `separator' isn't at the end of the
363 file, print the record. */
364 if (!first_time || match_end != past_end)
365 output (match_end, past_end);
366 past_end = match_end;
367 first_time = false;
369 else
371 output (match_start, past_end);
372 past_end = match_start;
375 /* For non-regex matching, we can back up. */
376 if (sentinel_length > 0)
377 match_start -= match_length - 1;
382 #if DONT_UNLINK_WHILE_OPEN
384 /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
385 Using atexit like this is wrong, since it can fail
386 when called e.g. 32 or more times.
387 But this isn't a big deal, since the code is used only on WOE/DOS
388 systems, and few people invoke tac on that many nonseekable files. */
390 static const char *file_to_remove;
391 static FILE *fp_to_close;
393 static void
394 unlink_tempfile (void)
396 fclose (fp_to_close);
397 unlink (file_to_remove);
400 static void
401 record_or_unlink_tempfile (char const *fn, FILE *fp)
403 if (!file_to_remove)
405 file_to_remove = fn;
406 fp_to_close = fp;
407 atexit (unlink_tempfile);
411 #else
413 static void
414 record_or_unlink_tempfile (char const *fn, FILE *fp ATTRIBUTE_UNUSED)
416 unlink (fn);
419 #endif
421 /* A wrapper around mkstemp that gives us both an open stream pointer,
422 FP, and the corresponding FILE_NAME. Always return the same FP/name
423 pair, rewinding/truncating it upon each reuse. */
424 static bool
425 temp_stream (FILE **fp, char **file_name)
427 static char *tempfile = NULL;
428 static FILE *tmp_fp;
429 if (tempfile == NULL)
431 char const *t = getenv ("TMPDIR");
432 char const *tempdir = t ? t : DEFAULT_TMPDIR;
433 tempfile = mfile_name_concat (tempdir, "tacXXXXXX", NULL);
434 if (tempdir == NULL)
436 error (0, 0, _("memory exhausted"));
437 return false;
440 /* FIXME: there's a small window between a successful mkstemp call
441 and the unlink that's performed by record_or_unlink_tempfile.
442 If we're interrupted in that interval, this code fails to remove
443 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
444 the window is much larger -- it extends to the atexit-called
445 unlink_tempfile.
446 FIXME: clean up upon fatal signal. Don't block them, in case
447 $TMPFILE is a remote file system. */
449 int fd = mkstemp (tempfile);
450 if (fd < 0)
452 error (0, errno, _("failed to create temporary file in %s"),
453 quote (tempdir));
454 goto Reset;
457 tmp_fp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
458 if (! tmp_fp)
460 error (0, errno, _("failed to open %s for writing"),
461 quote (tempfile));
462 close (fd);
463 unlink (tempfile);
464 Reset:
465 free (tempfile);
466 tempfile = NULL;
467 return false;
470 record_or_unlink_tempfile (tempfile, tmp_fp);
472 else
474 if (fseek (tmp_fp, 0, SEEK_SET) < 0
475 || ftruncate (fileno (tmp_fp), 0) < 0)
477 error (0, errno, _("failed to rewind stream for %s"),
478 quote (tempfile));
479 return false;
483 *fp = tmp_fp;
484 *file_name = tempfile;
485 return true;
488 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
489 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
490 and file name. Return true if successful. */
492 static bool
493 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
495 FILE *fp;
496 char *file_name;
497 if (!temp_stream (&fp, &file_name))
498 return false;
500 while (1)
502 size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
503 if (bytes_read == 0)
504 break;
505 if (bytes_read == SAFE_READ_ERROR)
507 error (0, errno, _("%s: read error"), quotearg_colon (file));
508 goto Fail;
511 if (fwrite (G_buffer, 1, bytes_read, fp) != bytes_read)
513 error (0, errno, _("%s: write error"), quotearg_colon (file_name));
514 goto Fail;
518 if (fflush (fp) != 0)
520 error (0, errno, _("%s: write error"), quotearg_colon (file_name));
521 goto Fail;
524 *g_tmp = fp;
525 *g_tempfile = file_name;
526 return true;
528 Fail:
529 fclose (fp);
530 return false;
533 /* Copy INPUT_FD to a temporary, then tac that file.
534 Return true if successful. */
536 static bool
537 tac_nonseekable (int input_fd, const char *file)
539 FILE *tmp_stream;
540 char *tmp_file;
541 if (!copy_to_temp (&tmp_stream, &tmp_file, input_fd, file))
542 return false;
544 bool ok = tac_seekable (fileno (tmp_stream), tmp_file);
545 return ok;
548 /* Print FILE in reverse, copying it to a temporary
549 file first if it is not seekable.
550 Return true if successful. */
552 static bool
553 tac_file (const char *filename)
555 bool ok;
556 off_t file_size;
557 int fd;
558 bool is_stdin = STREQ (filename, "-");
560 if (is_stdin)
562 have_read_stdin = true;
563 fd = STDIN_FILENO;
564 filename = _("standard input");
565 if (O_BINARY && ! isatty (STDIN_FILENO))
566 xfreopen (NULL, "rb", stdin);
568 else
570 fd = open (filename, O_RDONLY | O_BINARY);
571 if (fd < 0)
573 error (0, errno, _("failed to open %s for reading"),
574 quote (filename));
575 return false;
579 file_size = lseek (fd, 0, SEEK_END);
581 ok = (file_size < 0 || isatty (fd)
582 ? tac_nonseekable (fd, filename)
583 : tac_seekable (fd, filename));
585 if (!is_stdin && close (fd) != 0)
587 error (0, errno, _("%s: read error"), quotearg_colon (filename));
588 ok = false;
590 return ok;
594 main (int argc, char **argv)
596 const char *error_message; /* Return value from re_compile_pattern. */
597 int optc;
598 bool ok;
599 size_t half_buffer_size;
601 /* Initializer for file_list if no file-arguments
602 were specified on the command line. */
603 static char const *const default_file_list[] = {"-", NULL};
604 char const *const *file;
606 initialize_main (&argc, &argv);
607 set_program_name (argv[0]);
608 setlocale (LC_ALL, "");
609 bindtextdomain (PACKAGE, LOCALEDIR);
610 textdomain (PACKAGE);
612 atexit (close_stdout);
614 separator = "\n";
615 sentinel_length = 1;
616 separator_ends_record = true;
618 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
620 switch (optc)
622 case 'b':
623 separator_ends_record = false;
624 break;
625 case 'r':
626 sentinel_length = 0;
627 break;
628 case 's':
629 separator = optarg;
630 if (*separator == 0)
631 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
632 break;
633 case_GETOPT_HELP_CHAR;
634 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
635 default:
636 usage (EXIT_FAILURE);
640 if (sentinel_length == 0)
642 compiled_separator.buffer = NULL;
643 compiled_separator.allocated = 0;
644 compiled_separator.fastmap = compiled_separator_fastmap;
645 compiled_separator.translate = NULL;
646 error_message = re_compile_pattern (separator, strlen (separator),
647 &compiled_separator);
648 if (error_message)
649 error (EXIT_FAILURE, 0, "%s", error_message);
651 else
652 match_length = sentinel_length = strlen (separator);
654 read_size = INITIAL_READSIZE;
655 while (sentinel_length >= read_size / 2)
657 if (SIZE_MAX / 2 < read_size)
658 xalloc_die ();
659 read_size *= 2;
661 half_buffer_size = read_size + sentinel_length + 1;
662 G_buffer_size = 2 * half_buffer_size;
663 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
664 xalloc_die ();
665 G_buffer = xmalloc (G_buffer_size);
666 if (sentinel_length)
668 strcpy (G_buffer, separator);
669 G_buffer += sentinel_length;
671 else
673 ++G_buffer;
676 file = (optind < argc
677 ? (char const *const *) &argv[optind]
678 : default_file_list);
680 if (O_BINARY && ! isatty (STDOUT_FILENO))
681 xfreopen (NULL, "wb", stdout);
684 size_t i;
685 ok = true;
686 for (i = 0; file[i]; ++i)
687 ok &= tac_file (file[i]);
690 /* Flush the output buffer. */
691 output ((char *) NULL, (char *) NULL);
693 if (have_read_stdin && close (STDIN_FILENO) < 0)
695 error (0, errno, "-");
696 ok = false;
699 #ifdef lint
700 size_t offset = sentinel_length ? sentinel_length : 1;
701 free (G_buffer - offset);
702 #endif
704 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);