.
[coreutils.git] / src / tac.c
blobcd0923a016e1d64344e760c40cb25ac3040e9bfb
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 88, 89, 90, 91, 95, 1996 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
26 follows in the file.
28 Options:
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
35 tac -r -s '.\|
36 ' file */
38 #include <config.h>
40 #include <stdio.h>
41 #include <getopt.h>
42 #include <sys/types.h>
43 #include <signal.h>
44 #if WITH_REGEX
45 # include <regex.h>
46 #else
47 # include <rx.h>
48 #endif
49 #include "system.h"
50 #include "error.h"
52 #ifndef STDC_HEADERS
53 char *malloc ();
54 char *realloc ();
55 #endif
57 #ifndef DEFAULT_TMPDIR
58 #define DEFAULT_TMPDIR "/tmp"
59 #endif
61 /* The number of bytes per atomic read. */
62 #define INITIAL_READSIZE 8192
64 /* The number of bytes per atomic write. */
65 #define WRITESIZE 8192
67 char *mktemp ();
69 int full_write ();
70 int safe_read ();
72 /* The name this program was run with. */
73 char *program_name;
75 /* The string that separates the records of the file. */
76 static char *separator;
78 /* If nonzero, print `separator' along with the record preceding it
79 in the file; otherwise with the record following it. */
80 static int separator_ends_record;
82 /* 0 if `separator' is to be matched as a regular expression;
83 otherwise, the length of `separator', used as a sentinel to
84 stop the search. */
85 static int sentinel_length;
87 /* The length of a match with `separator'. If `sentinel_length' is 0,
88 `match_length' is computed every time a match succeeds;
89 otherwise, it is simply the length of `separator'. */
90 static int match_length;
92 /* The input buffer. */
93 static char *buffer;
95 /* The number of bytes to read at once into `buffer'. */
96 static unsigned read_size;
98 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
99 The extra 2 bytes allow `past_end' to have a value beyond the
100 end of `buffer' and `match_start' to run off the front of `buffer'. */
101 static unsigned buffer_size;
103 /* The compiled regular expression representing `separator'. */
104 static struct re_pattern_buffer compiled_separator;
106 /* The name of a temporary file containing a copy of pipe input. */
107 static char *tempfile;
109 /* If nonzero, display usage information and exit. */
110 static int show_help;
112 /* If nonzero, print the version on standard output then exit. */
113 static int show_version;
115 static struct option const longopts[] =
117 {"before", no_argument, &separator_ends_record, 0},
118 {"regex", no_argument, &sentinel_length, 0},
119 {"separator", required_argument, NULL, 's'},
120 {"help", no_argument, &show_help, 1},
121 {"version", no_argument, &show_version, 1},
122 {NULL, 0, NULL, 0}
125 static void
126 usage (int status)
128 if (status != 0)
129 fprintf (stderr, _("Try `%s --help' for more information.\n"),
130 program_name);
131 else
133 printf (_("\
134 Usage: %s [OPTION]... [FILE]...\n\
136 program_name);
137 printf (_("\
138 Write each FILE to standard output, last line first.\n\
139 With no FILE, or when FILE is -, read standard input.\n\
141 -b, --before attach the separator before instead of after\n\
142 -r, --regex interpret the separator as a regular expression\n\
143 -s, --separator=STRING use STRING as the separator instead of newline\n\
144 --help display this help and exit\n\
145 --version output version information and exit\n\
146 "));
148 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
151 static void
152 cleanup (void)
154 unlink (tempfile);
157 static void
158 cleanup_fatal (void)
160 cleanup ();
161 exit (EXIT_FAILURE);
164 static RETSIGTYPE
165 sighandler (int sig)
167 #ifdef SA_INTERRUPT
168 struct sigaction sigact;
170 sigact.sa_handler = SIG_DFL;
171 sigemptyset (&sigact.sa_mask);
172 sigact.sa_flags = 0;
173 sigaction (sig, &sigact, NULL);
174 #else /* !SA_INTERRUPT */
175 signal (sig, SIG_DFL);
176 #endif /* SA_INTERRUPT */
177 cleanup ();
178 kill (getpid (), sig);
181 /* Allocate N bytes of memory dynamically, with error checking. */
183 static char *
184 xmalloc (unsigned int n)
186 char *p;
188 p = malloc (n);
189 if (p == 0)
191 error (0, 0, _("virtual memory exhausted"));
192 cleanup_fatal ();
194 return p;
197 /* Change the size of memory area P to N bytes, with error checking. */
199 static char *
200 xrealloc (char *p, unsigned int n)
202 p = realloc (p, n);
203 if (p == 0)
205 error (0, 0, _("virtual memory exhausted"));
206 cleanup_fatal ();
208 return p;
211 static void
212 xwrite (int desc, const char *buffer, int size)
214 if (full_write (desc, buffer, size) < 0)
216 error (0, errno, _("write error"));
217 cleanup_fatal ();
221 /* Print the characters from START to PAST_END - 1.
222 If START is NULL, just flush the buffer. */
224 static void
225 output (const char *start, const char *past_end)
227 static char buffer[WRITESIZE];
228 static int bytes_in_buffer = 0;
229 int bytes_to_add = past_end - start;
230 int bytes_available = WRITESIZE - bytes_in_buffer;
232 if (start == 0)
234 xwrite (STDOUT_FILENO, buffer, bytes_in_buffer);
235 bytes_in_buffer = 0;
236 return;
239 /* Write out as many full buffers as possible. */
240 while (bytes_to_add >= bytes_available)
242 memcpy (buffer + bytes_in_buffer, start, bytes_available);
243 bytes_to_add -= bytes_available;
244 start += bytes_available;
245 xwrite (STDOUT_FILENO, buffer, WRITESIZE);
246 bytes_in_buffer = 0;
247 bytes_available = WRITESIZE;
250 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
251 bytes_in_buffer += bytes_to_add;
254 /* Print in reverse the file open on descriptor FD for reading FILE.
255 Return 0 if ok, 1 if an error occurs. */
257 static int
258 tac (int fd, const char *file)
260 /* Pointer to the location in `buffer' where the search for
261 the next separator will begin. */
262 char *match_start;
263 /* Pointer to one past the rightmost character in `buffer' that
264 has not been printed yet. */
265 char *past_end;
266 unsigned saved_record_size; /* Length of the record growing in `buffer'. */
267 off_t file_pos; /* Offset in the file of the next read. */
268 /* Nonzero if `output' has not been called yet for any file.
269 Only used when the separator is attached to the preceding record. */
270 int first_time = 1;
271 char first_char = *separator; /* Speed optimization, non-regexp. */
272 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
273 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
274 struct re_registers regs;
276 /* Find the size of the input file. */
277 file_pos = lseek (fd, (off_t) 0, SEEK_END);
278 if (file_pos < 1)
279 return 0; /* It's an empty file. */
281 /* Arrange for the first read to lop off enough to leave the rest of the
282 file a multiple of `read_size'. Since `read_size' can change, this may
283 not always hold during the program run, but since it usually will, leave
284 it here for i/o efficiency (page/sector boundaries and all that).
285 Note: the efficiency gain has not been verified. */
286 saved_record_size = file_pos % read_size;
287 if (saved_record_size == 0)
288 saved_record_size = read_size;
289 file_pos -= saved_record_size;
290 /* `file_pos' now points to the start of the last (probably partial) block
291 in the input file. */
293 lseek (fd, file_pos, SEEK_SET);
294 if (safe_read (fd, buffer, saved_record_size) != saved_record_size)
296 error (0, errno, "%s", file);
297 return 1;
300 match_start = past_end = buffer + saved_record_size;
301 /* For non-regexp search, move past impossible positions for a match. */
302 if (sentinel_length)
303 match_start -= match_length1;
305 for (;;)
307 /* Search backward from `match_start' - 1 to `buffer' for a match
308 with `separator'; for speed, use strncmp if `separator' contains no
309 metacharacters.
310 If the match succeeds, set `match_start' to point to the start of
311 the match and `match_length' to the length of the match.
312 Otherwise, make `match_start' < `buffer'. */
313 if (sentinel_length == 0)
315 int i = match_start - buffer;
316 int ret;
318 ret = re_search (&compiled_separator, buffer, i, i - 1, -i, &regs);
319 if (ret == -1)
320 match_start = buffer - 1;
321 else if (ret == -2)
323 error (0, 0, _("error in regular expression search"));
324 cleanup_fatal ();
326 else
328 match_start = buffer + regs.start[0];
329 match_length = regs.end[0] - regs.start[0];
332 else
334 /* `match_length' is constant for non-regexp boundaries. */
335 while (*--match_start != first_char
336 || (match_length1 && strncmp (match_start + 1, separator1,
337 match_length1)))
338 /* Do nothing. */ ;
341 /* Check whether we backed off the front of `buffer' without finding
342 a match for `separator'. */
343 if (match_start < buffer)
345 if (file_pos == 0)
347 /* Hit the beginning of the file; print the remaining record. */
348 output (buffer, past_end);
349 return 0;
352 saved_record_size = past_end - buffer;
353 if (saved_record_size > read_size)
355 /* `buffer_size' is about twice `read_size', so since
356 we want to read in another `read_size' bytes before
357 the data already in `buffer', we need to increase
358 `buffer_size'. */
359 char *newbuffer;
360 int offset = sentinel_length ? sentinel_length : 1;
362 read_size *= 2;
363 buffer_size = read_size * 2 + sentinel_length + 2;
364 newbuffer = xrealloc (buffer - offset, buffer_size) + offset;
365 /* Adjust the pointers for the new buffer location. */
366 match_start += newbuffer - buffer;
367 past_end += newbuffer - buffer;
368 buffer = newbuffer;
371 /* Back up to the start of the next bufferfull of the file. */
372 if (file_pos >= read_size)
373 file_pos -= read_size;
374 else
376 read_size = file_pos;
377 file_pos = 0;
379 lseek (fd, file_pos, SEEK_SET);
381 /* Shift the pending record data right to make room for the new.
382 The source and destination regions probably overlap. */
383 memmove (buffer + read_size, buffer, saved_record_size);
384 past_end = buffer + read_size + saved_record_size;
385 /* For non-regexp searches, avoid unneccessary scanning. */
386 if (sentinel_length)
387 match_start = buffer + read_size;
388 else
389 match_start = past_end;
391 if (safe_read (fd, buffer, read_size) != read_size)
393 error (0, errno, "%s", file);
394 return 1;
397 else
399 /* Found a match of `separator'. */
400 if (separator_ends_record)
402 char *match_end = match_start + match_length;
404 /* If this match of `separator' isn't at the end of the
405 file, print the record. */
406 if (first_time == 0 || match_end != past_end)
407 output (match_end, past_end);
408 past_end = match_end;
409 first_time = 0;
411 else
413 output (match_start, past_end);
414 past_end = match_start;
416 match_start -= match_length - 1;
421 /* Print FILE in reverse.
422 Return 0 if ok, 1 if an error occurs. */
424 static int
425 tac_file (const char *file)
427 int fd, errors;
429 fd = open (file, O_RDONLY);
430 if (fd == -1)
432 error (0, errno, "%s", file);
433 return 1;
435 errors = tac (fd, file);
436 if (close (fd) < 0)
438 error (0, errno, "%s", file);
439 return 1;
441 return errors;
444 /* Make a copy of the standard input in `tempfile'. */
446 static void
447 save_stdin (void)
449 static char *template = NULL;
450 static char *tempdir;
451 int fd;
452 int bytes_read;
454 if (template == NULL)
456 tempdir = getenv ("TMPDIR");
457 if (tempdir == NULL)
458 tempdir = DEFAULT_TMPDIR;
459 template = xmalloc (strlen (tempdir) + 11);
461 sprintf (template, "%s/tacXXXXXX", tempdir);
462 tempfile = mktemp (template);
464 fd = creat (tempfile, 0600);
465 if (fd == -1)
467 error (0, errno, "%s", tempfile);
468 cleanup_fatal ();
470 while ((bytes_read = safe_read (0, buffer, read_size)) > 0)
471 if (full_write (fd, buffer, bytes_read) < 0)
473 error (0, errno, "%s", tempfile);
474 cleanup_fatal ();
476 if (close (fd) < 0)
478 error (0, errno, "%s", tempfile);
479 cleanup_fatal ();
481 if (bytes_read == -1)
483 error (0, errno, _("read error"));
484 cleanup_fatal ();
488 /* Print the standard input in reverse, saving it to temporary
489 file `tempfile' first if it is a pipe.
490 Return 0 if ok, 1 if an error occurs. */
492 static int
493 tac_stdin (void)
495 /* Previous values of signal handlers. */
496 RETSIGTYPE (*sigint) (), (*sighup) (), (*sigpipe) (), (*sigterm) ();
497 int errors;
498 struct stat stats;
499 #ifdef SA_INTERRUPT
500 struct sigaction oldact, newact;
501 #endif /* SA_INTERRUPT */
503 /* No tempfile is needed for "tac < file".
504 Use fstat instead of checking for errno == ESPIPE because
505 lseek doesn't work on some special files but doesn't return an
506 error, either. */
507 if (fstat (0, &stats))
509 error (0, errno, _("standard input"));
510 return 1;
512 if (S_ISREG (stats.st_mode))
513 return tac (0, _("standard input"));
515 #ifdef SA_INTERRUPT
516 newact.sa_handler = sighandler;
517 sigemptyset (&newact.sa_mask);
518 newact.sa_flags = 0;
520 sigaction (SIGINT, NULL, &oldact);
521 sigint = oldact.sa_handler;
522 if (sigint != SIG_IGN)
523 sigaction (SIGINT, &newact, NULL);
525 sigaction (SIGHUP, NULL, &oldact);
526 sighup = oldact.sa_handler;
527 if (sighup != SIG_IGN)
528 sigaction (SIGHUP, &newact, NULL);
530 sigaction (SIGPIPE, NULL, &oldact);
531 sigpipe = oldact.sa_handler;
532 if (sigpipe != SIG_IGN)
533 sigaction (SIGPIPE, &newact, NULL);
535 sigaction (SIGTERM, NULL, &oldact);
536 sigterm = oldact.sa_handler;
537 if (sigterm != SIG_IGN)
538 sigaction (SIGTERM, &newact, NULL);
539 #else /* !SA_INTERRUPT */
540 sigint = signal (SIGINT, SIG_IGN);
541 if (sigint != SIG_IGN)
542 signal (SIGINT, sighandler);
544 sighup = signal (SIGHUP, SIG_IGN);
545 if (sighup != SIG_IGN)
546 signal (SIGHUP, sighandler);
548 sigpipe = signal (SIGPIPE, SIG_IGN);
549 if (sigpipe != SIG_IGN)
550 signal (SIGPIPE, sighandler);
552 sigterm = signal (SIGTERM, SIG_IGN);
553 if (sigterm != SIG_IGN)
554 signal (SIGTERM, sighandler);
555 #endif /* SA_INTERRUPT */
557 save_stdin ();
559 errors = tac_file (tempfile);
561 unlink (tempfile);
563 #ifdef SA_INTERRUPT
564 newact.sa_handler = sigint;
565 sigaction (SIGINT, &newact, NULL);
566 newact.sa_handler = sighup;
567 sigaction (SIGHUP, &newact, NULL);
568 newact.sa_handler = sigterm;
569 sigaction (SIGTERM, &newact, NULL);
570 newact.sa_handler = sigpipe;
571 sigaction (SIGPIPE, &newact, NULL);
572 #else /* !SA_INTERRUPT */
573 signal (SIGINT, sigint);
574 signal (SIGHUP, sighup);
575 signal (SIGTERM, sigterm);
576 signal (SIGPIPE, sigpipe);
577 #endif /* SA_INTERRUPT */
579 return errors;
583 main (int argc, char **argv)
585 const char *error_message; /* Return value from re_compile_pattern. */
586 int optc, errors;
587 int have_read_stdin = 0;
589 program_name = argv[0];
590 setlocale (LC_ALL, "");
591 bindtextdomain (PACKAGE, LOCALEDIR);
592 textdomain (PACKAGE);
594 errors = 0;
595 separator = "\n";
596 sentinel_length = 1;
597 separator_ends_record = 1;
599 while ((optc = getopt_long (argc, argv, "brs:", longopts, (int *) 0))
600 != EOF)
602 switch (optc)
604 case 0:
605 break;
606 case 'b':
607 separator_ends_record = 0;
608 break;
609 case 'r':
610 sentinel_length = 0;
611 break;
612 case 's':
613 separator = optarg;
614 if (*separator == 0)
615 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
616 break;
617 default:
618 usage (1);
622 if (show_version)
624 printf ("tac - %s\n", PACKAGE_VERSION);
625 exit (EXIT_SUCCESS);
628 if (show_help)
629 usage (0);
631 if (sentinel_length == 0)
633 compiled_separator.allocated = 100;
634 compiled_separator.buffer = (unsigned char *)
635 xmalloc (compiled_separator.allocated);
636 compiled_separator.fastmap = xmalloc (256);
637 compiled_separator.translate = 0;
638 error_message = re_compile_pattern (separator, strlen (separator),
639 &compiled_separator);
640 if (error_message)
641 error (EXIT_FAILURE, 0, "%s", error_message);
643 else
644 match_length = sentinel_length = strlen (separator);
646 read_size = INITIAL_READSIZE;
647 /* A precaution that will probably never be needed. */
648 while (sentinel_length * 2 >= read_size)
649 read_size *= 2;
650 buffer_size = read_size * 2 + sentinel_length + 2;
651 buffer = xmalloc (buffer_size);
652 if (sentinel_length)
654 strcpy (buffer, separator);
655 buffer += sentinel_length;
657 else
658 ++buffer;
660 if (optind == argc)
662 have_read_stdin = 1;
663 errors = tac_stdin ();
665 else
666 for (; optind < argc; ++optind)
668 if (strcmp (argv[optind], "-") == 0)
670 have_read_stdin = 1;
671 errors |= tac_stdin ();
673 else
674 errors |= tac_file (argv[optind]);
677 /* Flush the output buffer. */
678 output ((char *) NULL, (char *) NULL);
680 if (have_read_stdin && close (0) < 0)
681 error (EXIT_FAILURE, errno, "-");
682 if (close (1) < 0)
683 error (EXIT_FAILURE, errno, _("write error"));
684 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);