*** empty log message ***
[coreutils.git] / src / tac.c
blob87b9ca7a371a1dba9f2cc234ab834799a51ac311
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
26 follows in the file.
28 Options:
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
35 tac -r -s '.\|
36 ' file */
38 #include <config.h>
40 #include <stdio.h>
41 #include <getopt.h>
42 #include <sys/types.h>
43 #include "system.h"
45 #include <regex.h>
47 #include "error.h"
48 #include "safe-read.h"
50 /* The official name of this program (e.g., no `g' prefix). */
51 #define PROGRAM_NAME "tac"
53 #define AUTHORS "Jay Lepreau and David MacKenzie"
55 #if defined __MSDOS__ || defined _WIN32
56 /* Define this to non-zero on systems for which the regular mechanism
57 (of unlinking an open file and expecting to be able to write, seek
58 back to the beginning, then reread it) doesn't work. E.g., on Windows
59 and DOS systems. */
60 # define DONT_UNLINK_WHILE_OPEN 1
61 #endif
64 #ifndef DEFAULT_TMPDIR
65 # define DEFAULT_TMPDIR "/tmp"
66 #endif
68 /* The number of bytes per atomic read. */
69 #define INITIAL_READSIZE 8192
71 /* The number of bytes per atomic write. */
72 #define WRITESIZE 8192
74 char *mktemp ();
76 /* The name this program was run with. */
77 char *program_name;
79 /* The string that separates the records of the file. */
80 static char *separator;
82 /* If nonzero, print `separator' along with the record preceding it
83 in the file; otherwise with the record following it. */
84 static int separator_ends_record;
86 /* 0 if `separator' is to be matched as a regular expression;
87 otherwise, the length of `separator', used as a sentinel to
88 stop the search. */
89 static int sentinel_length;
91 /* The length of a match with `separator'. If `sentinel_length' is 0,
92 `match_length' is computed every time a match succeeds;
93 otherwise, it is simply the length of `separator'. */
94 static int match_length;
96 /* The input buffer. */
97 static char *G_buffer;
99 /* The number of bytes to read at once into `buffer'. */
100 static size_t read_size;
102 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
103 The extra 2 bytes allow `past_end' to have a value beyond the
104 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
105 static unsigned G_buffer_size;
107 /* The compiled regular expression representing `separator'. */
108 static struct re_pattern_buffer compiled_separator;
110 static struct option const longopts[] =
112 {"before", no_argument, NULL, 'b'},
113 {"regex", no_argument, NULL, 'r'},
114 {"separator", required_argument, NULL, 's'},
115 {GETOPT_HELP_OPTION_DECL},
116 {GETOPT_VERSION_OPTION_DECL},
117 {NULL, 0, NULL, 0}
120 void
121 usage (int status)
123 if (status != 0)
124 fprintf (stderr, _("Try `%s --help' for more information.\n"),
125 program_name);
126 else
128 printf (_("\
129 Usage: %s [OPTION]... [FILE]...\n\
131 program_name);
132 printf (_("\
133 Write each FILE to standard output, last line first.\n\
134 With no FILE, or when FILE is -, read standard input.\n\
136 -b, --before attach the separator before instead of after\n\
137 -r, --regex interpret the separator as a regular expression\n\
138 -s, --separator=STRING use STRING as the separator instead of newline\n\
139 --help display this help and exit\n\
140 --version output version information and exit\n\
141 "));
142 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
144 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
147 /* Print the characters from START to PAST_END - 1.
148 If START is NULL, just flush the buffer. */
150 static void
151 output (const char *start, const char *past_end)
153 static char buffer[WRITESIZE];
154 static int bytes_in_buffer = 0;
155 int bytes_to_add = past_end - start;
156 int bytes_available = WRITESIZE - bytes_in_buffer;
158 if (start == 0)
160 fwrite (buffer, 1, bytes_in_buffer, stdout);
161 bytes_in_buffer = 0;
162 return;
165 /* Write out as many full buffers as possible. */
166 while (bytes_to_add >= bytes_available)
168 memcpy (buffer + bytes_in_buffer, start, bytes_available);
169 bytes_to_add -= bytes_available;
170 start += bytes_available;
171 fwrite (buffer, 1, WRITESIZE, stdout);
172 bytes_in_buffer = 0;
173 bytes_available = WRITESIZE;
176 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
177 bytes_in_buffer += bytes_to_add;
180 /* Print in reverse the file open on descriptor FD for reading FILE.
181 Return 0 if ok, 1 if an error occurs. */
183 static int
184 tac_seekable (int input_fd, const char *file)
186 /* Pointer to the location in `G_buffer' where the search for
187 the next separator will begin. */
188 char *match_start;
190 /* Pointer to one past the rightmost character in `G_buffer' that
191 has not been printed yet. */
192 char *past_end;
194 /* Length of the record growing in `G_buffer'. */
195 size_t saved_record_size;
197 /* Offset in the file of the next read. */
198 off_t file_pos;
200 /* Nonzero if `output' has not been called yet for any file.
201 Only used when the separator is attached to the preceding record. */
202 int first_time = 1;
203 char first_char = *separator; /* Speed optimization, non-regexp. */
204 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
205 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
206 struct re_registers regs;
208 /* Find the size of the input file. */
209 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
210 if (file_pos < 1)
211 return 0; /* It's an empty file. */
213 /* Arrange for the first read to lop off enough to leave the rest of the
214 file a multiple of `read_size'. Since `read_size' can change, this may
215 not always hold during the program run, but since it usually will, leave
216 it here for i/o efficiency (page/sector boundaries and all that).
217 Note: the efficiency gain has not been verified. */
218 saved_record_size = file_pos % read_size;
219 if (saved_record_size == 0)
220 saved_record_size = read_size;
221 file_pos -= saved_record_size;
222 /* `file_pos' now points to the start of the last (probably partial) block
223 in the input file. */
225 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
226 error (0, errno, "%s: seek failed", file);
228 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
230 error (0, errno, "%s", file);
231 return 1;
234 match_start = past_end = G_buffer + saved_record_size;
235 /* For non-regexp search, move past impossible positions for a match. */
236 if (sentinel_length)
237 match_start -= match_length1;
239 for (;;)
241 /* Search backward from `match_start' - 1 to `G_buffer' for a match
242 with `separator'; for speed, use strncmp if `separator' contains no
243 metacharacters.
244 If the match succeeds, set `match_start' to point to the start of
245 the match and `match_length' to the length of the match.
246 Otherwise, make `match_start' < `G_buffer'. */
247 if (sentinel_length == 0)
249 int i = match_start - G_buffer;
250 int ret;
252 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, &regs);
253 if (ret == -1)
254 match_start = G_buffer - 1;
255 else if (ret == -2)
257 error (EXIT_FAILURE, 0,
258 _("error in regular expression search"));
260 else
262 match_start = G_buffer + regs.start[0];
263 match_length = regs.end[0] - regs.start[0];
266 else
268 /* `match_length' is constant for non-regexp boundaries. */
269 while (*--match_start != first_char
270 || (match_length1 && strncmp (match_start + 1, separator1,
271 match_length1)))
272 /* Do nothing. */ ;
275 /* Check whether we backed off the front of `G_buffer' without finding
276 a match for `separator'. */
277 if (match_start < G_buffer)
279 if (file_pos == 0)
281 /* Hit the beginning of the file; print the remaining record. */
282 output (G_buffer, past_end);
283 return 0;
286 saved_record_size = past_end - G_buffer;
287 if (saved_record_size > read_size)
289 /* `G_buffer_size' is about twice `read_size', so since
290 we want to read in another `read_size' bytes before
291 the data already in `G_buffer', we need to increase
292 `G_buffer_size'. */
293 char *newbuffer;
294 int offset = sentinel_length ? sentinel_length : 1;
296 read_size *= 2;
297 G_buffer_size = read_size * 2 + sentinel_length + 2;
298 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
299 newbuffer += offset;
300 /* Adjust the pointers for the new buffer location. */
301 match_start += newbuffer - G_buffer;
302 past_end += newbuffer - G_buffer;
303 G_buffer = newbuffer;
306 /* Back up to the start of the next bufferfull of the file. */
307 if (file_pos >= read_size)
308 file_pos -= read_size;
309 else
311 read_size = file_pos;
312 file_pos = 0;
314 lseek (input_fd, file_pos, SEEK_SET);
316 /* Shift the pending record data right to make room for the new.
317 The source and destination regions probably overlap. */
318 memmove (G_buffer + read_size, G_buffer, saved_record_size);
319 past_end = G_buffer + read_size + saved_record_size;
320 /* For non-regexp searches, avoid unneccessary scanning. */
321 if (sentinel_length)
322 match_start = G_buffer + read_size;
323 else
324 match_start = past_end;
326 if (safe_read (input_fd, G_buffer, read_size) != read_size)
328 error (0, errno, "%s", file);
329 return 1;
332 else
334 /* Found a match of `separator'. */
335 if (separator_ends_record)
337 char *match_end = match_start + match_length;
339 /* If this match of `separator' isn't at the end of the
340 file, print the record. */
341 if (first_time == 0 || match_end != past_end)
342 output (match_end, past_end);
343 past_end = match_end;
344 first_time = 0;
346 else
348 output (match_start, past_end);
349 past_end = match_start;
352 /* For non-regex matching, we can back up. */
353 if (sentinel_length > 0)
354 match_start -= match_length - 1;
359 /* Print FILE in reverse.
360 Return 0 if ok, 1 if an error occurs. */
362 static int
363 tac_file (const char *file)
365 int errors;
366 FILE *in;
368 in = fopen (file, "r");
369 if (in == NULL)
371 error (0, errno, "%s", file);
372 return 1;
374 SET_BINARY (fileno (in));
375 errors = tac_seekable (fileno (in), file);
376 if (ferror (in) || fclose (in) == EOF)
378 error (0, errno, "%s", file);
379 return 1;
381 return errors;
384 #if DONT_UNLINK_WHILE_OPEN
386 static const char *file_to_remove;
387 static FILE *fp_to_close;
389 static void
390 unlink_tempfile (void)
392 fclose (fp_to_close);
393 unlink (file_to_remove);
396 static void
397 record_tempfile (const char *fn, FILE *fp)
399 if (!file_to_remove)
401 file_to_remove = fn;
402 fp_to_close = fp;
403 atexit (unlink_tempfile);
407 #endif
409 /* Make a copy of the standard input in `FIXME'. */
411 static void
412 save_stdin (FILE **g_tmp, char **g_tempfile)
414 static char *template = NULL;
415 static char *tempdir;
416 static char *tempfile;
417 FILE *tmp;
418 ssize_t bytes_read;
419 int fd;
421 if (template == NULL)
423 tempdir = getenv ("TMPDIR");
424 if (tempdir == NULL)
425 tempdir = DEFAULT_TMPDIR;
426 template = xmalloc (strlen (tempdir) + 11);
428 sprintf (template, "%s/tacXXXXXX", tempdir);
429 tempfile = mktemp (template);
431 /* Open temporary file exclusively, to foil a common
432 denial-of-service attack. */
433 fd = open (tempfile, O_RDWR | O_CREAT | O_TRUNC | O_EXCL, 0600);
434 if (fd == -1)
435 error (EXIT_FAILURE, errno, "%s", tempfile);
437 tmp = fdopen (fd, "w+");
438 if (tmp == NULL)
439 error (EXIT_FAILURE, errno, "%s", tempfile);
441 #if DONT_UNLINK_WHILE_OPEN
442 record_tempfile (tempfile, tmp);
443 #else
444 unlink (tempfile);
445 #endif
447 while (1)
449 bytes_read = safe_read (STDIN_FILENO, G_buffer, read_size);
450 if (bytes_read == 0)
451 break;
452 if (bytes_read < 0)
453 error (EXIT_FAILURE, errno, _("stdin: read error"));
455 /* Don't bother checking for failure inside the loop -- check after. */
456 fwrite (G_buffer, 1, bytes_read, tmp);
459 if (ferror (tmp) || fflush (tmp) == EOF)
460 error (EXIT_FAILURE, errno, "%s", tempfile);
462 rewind (tmp);
464 SET_BINARY (fileno (tmp));
465 *g_tmp = tmp;
466 *g_tempfile = tempfile;
469 /* Print the standard input in reverse, saving it to temporary
470 file first if it is a pipe.
471 Return 0 if ok, 1 if an error occurs. */
473 static int
474 tac_stdin (void)
476 int errors;
477 struct stat stats;
479 /* No tempfile is needed for "tac < file".
480 Use fstat instead of checking for errno == ESPIPE because
481 lseek doesn't work on some special files but doesn't return an
482 error, either. */
483 if (fstat (STDIN_FILENO, &stats))
485 error (0, errno, _("standard input"));
486 return 1;
489 if (S_ISREG (stats.st_mode))
491 errors = tac_seekable (fileno (stdin), _("standard input"));
493 else
495 FILE *tmp_stream;
496 char *tmp_file;
497 save_stdin (&tmp_stream, &tmp_file);
498 errors = tac_seekable (fileno (tmp_stream), tmp_file);
501 return errors;
504 #if 0
505 /* BUF_END points one byte past the end of the buffer to be searched. */
507 static void *
508 memrchr (const char *buf_start, const char *buf_end, int c)
510 const char *p = buf_end;
511 while (buf_start <= --p)
513 if (*(const unsigned char *) p == c)
514 return (void *) p;
516 return NULL;
519 /* FIXME: describe */
521 static int
522 tac_mem (const char *buf, size_t n_bytes, FILE *out)
524 const char *nl;
525 const char *bol;
527 if (n_bytes == 0)
528 return 0;
530 nl = memrchr (buf, buf + n_bytes, '\n');
531 bol = (nl == NULL ? buf : nl + 1);
533 /* If the last line of the input file has no terminating newline,
534 treat it as a special case. */
535 if (bol < buf + n_bytes)
537 /* Print out the line from bol to end of input. */
538 fwrite (bol, 1, (buf + n_bytes) - bol, out);
540 /* Add a newline here. Otherwise, the first and second lines
541 of output would appear to have been joined. */
542 fputc ('\n', out);
545 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
547 /* Output the line (which includes a trailing newline)
548 from NL+1 to BOL-1. */
549 fwrite (nl + 1, 1, bol - (nl + 1), out);
551 bol = nl + 1;
554 /* If there's anything left, output the last line: BUF .. BOL-1.
555 When the first byte of the input is a newline, there is nothing
556 left to do here. */
557 if (buf < bol)
558 fwrite (buf, 1, bol - buf, out);
560 /* FIXME: this is work in progress.... */
561 return ferror (out);
564 /* FIXME: describe */
566 static int
567 tac_stdin_to_mem (void)
569 char *buf = NULL;
570 size_t bufsiz = 8 * BUFSIZ;
571 size_t delta = 8 * BUFSIZ;
572 size_t n_bytes = 0;
574 while (1)
576 ssize_t bytes_read;
577 if (buf == NULL)
578 buf = (char *) malloc (bufsiz);
579 else
580 buf = (char *) realloc (buf, bufsiz);
582 if (buf == NULL)
584 /* Free the buffer and fall back on the code that relies on a
585 temporary file. */
586 free (buf);
587 /* FIXME */
588 abort ();
590 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
591 if (bytes_read == 0)
592 break;
593 if (bytes_read < 0)
594 error (EXIT_FAILURE, errno, _("stdin: read error"));
595 n_bytes += bytes_read;
597 bufsiz += delta;
600 tac_mem (buf, n_bytes, stdout);
602 return 0;
604 #endif
607 main (int argc, char **argv)
609 const char *error_message; /* Return value from re_compile_pattern. */
610 int optc, errors;
611 int have_read_stdin = 0;
613 program_name = argv[0];
614 setlocale (LC_ALL, "");
615 bindtextdomain (PACKAGE, LOCALEDIR);
616 textdomain (PACKAGE);
618 errors = 0;
619 separator = "\n";
620 sentinel_length = 1;
621 separator_ends_record = 1;
623 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
625 switch (optc)
627 case 0:
628 break;
629 case 'b':
630 separator_ends_record = 0;
631 break;
632 case 'r':
633 sentinel_length = 0;
634 break;
635 case 's':
636 separator = optarg;
637 if (*separator == 0)
638 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
639 break;
640 case_GETOPT_HELP_CHAR;
641 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
642 default:
643 usage (1);
647 if (sentinel_length == 0)
649 compiled_separator.allocated = 100;
650 compiled_separator.buffer = (unsigned char *)
651 xmalloc (compiled_separator.allocated);
652 compiled_separator.fastmap = xmalloc (256);
653 compiled_separator.translate = 0;
654 error_message = re_compile_pattern (separator, strlen (separator),
655 &compiled_separator);
656 if (error_message)
657 error (EXIT_FAILURE, 0, "%s", error_message);
659 else
660 match_length = sentinel_length = strlen (separator);
662 read_size = INITIAL_READSIZE;
663 /* A precaution that will probably never be needed. */
664 while (sentinel_length * 2 >= read_size)
665 read_size *= 2;
666 G_buffer_size = read_size * 2 + sentinel_length + 2;
667 G_buffer = xmalloc (G_buffer_size);
668 if (sentinel_length)
670 strcpy (G_buffer, separator);
671 G_buffer += sentinel_length;
673 else
675 ++G_buffer;
678 if (optind == argc)
680 have_read_stdin = 1;
681 /* We need binary I/O, since `tac' relies
682 on `lseek' and byte counts. */
683 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
684 errors = tac_stdin ();
686 else
688 for (; optind < argc; ++optind)
690 if (STREQ (argv[optind], "-"))
692 have_read_stdin = 1;
693 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
694 errors |= tac_stdin ();
696 else
698 /* Binary output will leave the lines' ends (NL or
699 CR/LF) intact when the output is a disk file.
700 Writing a file with CR/LF pairs at end of lines in
701 text mode has no visible effect on console output,
702 since two CRs in a row are just like one CR. */
703 SET_BINARY (STDOUT_FILENO);
704 errors |= tac_file (argv[optind]);
709 /* Flush the output buffer. */
710 output ((char *) NULL, (char *) NULL);
712 if (have_read_stdin && close (0) < 0)
713 error (EXIT_FAILURE, errno, "-");
714 if (ferror (stdout) || fclose (stdout) == EOF)
715 error (EXIT_FAILURE, errno, _("write error"));
716 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);