.
[coreutils.git] / src / tac.c
blobbcf352e7c7c3356e36a3c0f35bc114e63e9bb2db
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988, 1989, 1990, 1991, 1995 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
26 follows in the file.
28 Options:
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
35 tac -r -s '.\|
36 ' file */
38 #include <config.h>
40 #include <stdio.h>
41 #include <getopt.h>
42 #include <sys/types.h>
43 #include <signal.h>
44 #include <regex.h>
45 #include "system.h"
46 #include "version.h"
47 #include "error.h"
49 #ifndef STDC_HEADERS
50 char *malloc ();
51 char *realloc ();
52 #endif
54 #ifndef DEFAULT_TMPDIR
55 #define DEFAULT_TMPDIR "/tmp"
56 #endif
58 /* The number of bytes per atomic read. */
59 #define INITIAL_READSIZE 8192
61 /* The number of bytes per atomic write. */
62 #define WRITESIZE 8192
64 char *mktemp ();
66 int full_write ();
67 int safe_read ();
69 /* The name this program was run with. */
70 char *program_name;
72 /* The string that separates the records of the file. */
73 static char *separator;
75 /* If nonzero, print `separator' along with the record preceding it
76 in the file; otherwise with the record following it. */
77 static int separator_ends_record;
79 /* 0 if `separator' is to be matched as a regular expression;
80 otherwise, the length of `separator', used as a sentinel to
81 stop the search. */
82 static int sentinel_length;
84 /* The length of a match with `separator'. If `sentinel_length' is 0,
85 `match_length' is computed every time a match succeeds;
86 otherwise, it is simply the length of `separator'. */
87 static int match_length;
89 /* The input buffer. */
90 static char *buffer;
92 /* The number of bytes to read at once into `buffer'. */
93 static unsigned read_size;
95 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
96 The extra 2 bytes allow `past_end' to have a value beyond the
97 end of `buffer' and `match_start' to run off the front of `buffer'. */
98 static unsigned buffer_size;
100 /* The compiled regular expression representing `separator'. */
101 static struct re_pattern_buffer compiled_separator;
103 /* The name of a temporary file containing a copy of pipe input. */
104 static char *tempfile;
106 /* If nonzero, display usage information and exit. */
107 static int show_help;
109 /* If nonzero, print the version on standard output then exit. */
110 static int show_version;
112 static struct option const longopts[] =
114 {"before", no_argument, &separator_ends_record, 0},
115 {"regex", no_argument, &sentinel_length, 0},
116 {"separator", required_argument, NULL, 's'},
117 {"help", no_argument, &show_help, 1},
118 {"version", no_argument, &show_version, 1},
119 {NULL, 0, NULL, 0}
122 static void
123 usage (int status)
125 if (status != 0)
126 fprintf (stderr, _("Try `%s --help' for more information.\n"),
127 program_name);
128 else
130 printf (_("\
131 Usage: %s [OPTION]... [FILE]...\n\
133 program_name);
134 printf (_("\
135 Write each FILE to standard output, last line first.\n\
136 With no FILE, or when FILE is -, read standard input.\n\
138 -b, --before attach the separator before instead of after\n\
139 -r, --regex interpret the separator as a regular expression\n\
140 -s, --separator=STRING use STRING as the separator instead of newline\n\
141 --help display this help and exit\n\
142 --version output version information and exit\n\
143 "));
145 exit (status);
148 static void
149 cleanup (void)
151 unlink (tempfile);
154 static void
155 cleanup_fatal (void)
157 cleanup ();
158 exit (1);
161 static RETSIGTYPE
162 sighandler (int sig)
164 #ifdef SA_INTERRUPT
165 struct sigaction sigact;
167 sigact.sa_handler = SIG_DFL;
168 sigemptyset (&sigact.sa_mask);
169 sigact.sa_flags = 0;
170 sigaction (sig, &sigact, NULL);
171 #else /* !SA_INTERRUPT */
172 signal (sig, SIG_DFL);
173 #endif /* SA_INTERRUPT */
174 cleanup ();
175 kill (getpid (), sig);
178 /* Allocate N bytes of memory dynamically, with error checking. */
180 static char *
181 xmalloc (unsigned int n)
183 char *p;
185 p = malloc (n);
186 if (p == 0)
188 error (0, 0, _("virtual memory exhausted"));
189 cleanup_fatal ();
191 return p;
194 /* Change the size of memory area P to N bytes, with error checking. */
196 static char *
197 xrealloc (char *p, unsigned int n)
199 p = realloc (p, n);
200 if (p == 0)
202 error (0, 0, _("virtual memory exhausted"));
203 cleanup_fatal ();
205 return p;
208 static void
209 xwrite (int desc, const char *buffer, int size)
211 if (full_write (desc, buffer, size) < 0)
213 error (0, errno, _("write error"));
214 cleanup_fatal ();
218 /* Print the characters from START to PAST_END - 1.
219 If START is NULL, just flush the buffer. */
221 static void
222 output (const char *start, const char *past_end)
224 static char buffer[WRITESIZE];
225 static int bytes_in_buffer = 0;
226 int bytes_to_add = past_end - start;
227 int bytes_available = WRITESIZE - bytes_in_buffer;
229 if (start == 0)
231 xwrite (STDOUT_FILENO, buffer, bytes_in_buffer);
232 bytes_in_buffer = 0;
233 return;
236 /* Write out as many full buffers as possible. */
237 while (bytes_to_add >= bytes_available)
239 memcpy (buffer + bytes_in_buffer, start, bytes_available);
240 bytes_to_add -= bytes_available;
241 start += bytes_available;
242 xwrite (STDOUT_FILENO, buffer, WRITESIZE);
243 bytes_in_buffer = 0;
244 bytes_available = WRITESIZE;
247 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
248 bytes_in_buffer += bytes_to_add;
251 /* Print in reverse the file open on descriptor FD for reading FILE.
252 Return 0 if ok, 1 if an error occurs. */
254 static int
255 tac (int fd, const char *file)
257 /* Pointer to the location in `buffer' where the search for
258 the next separator will begin. */
259 char *match_start;
260 /* Pointer to one past the rightmost character in `buffer' that
261 has not been printed yet. */
262 char *past_end;
263 unsigned saved_record_size; /* Length of the record growing in `buffer'. */
264 off_t file_pos; /* Offset in the file of the next read. */
265 /* Nonzero if `output' has not been called yet for any file.
266 Only used when the separator is attached to the preceding record. */
267 int first_time = 1;
268 char first_char = *separator; /* Speed optimization, non-regexp. */
269 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
270 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
271 struct re_registers regs;
273 /* Find the size of the input file. */
274 file_pos = lseek (fd, (off_t) 0, SEEK_END);
275 if (file_pos < 1)
276 return 0; /* It's an empty file. */
278 /* Arrange for the first read to lop off enough to leave the rest of the
279 file a multiple of `read_size'. Since `read_size' can change, this may
280 not always hold during the program run, but since it usually will, leave
281 it here for i/o efficiency (page/sector boundaries and all that).
282 Note: the efficiency gain has not been verified. */
283 saved_record_size = file_pos % read_size;
284 if (saved_record_size == 0)
285 saved_record_size = read_size;
286 file_pos -= saved_record_size;
287 /* `file_pos' now points to the start of the last (probably partial) block
288 in the input file. */
290 lseek (fd, file_pos, SEEK_SET);
291 if (safe_read (fd, buffer, saved_record_size) != saved_record_size)
293 error (0, errno, "%s", file);
294 return 1;
297 match_start = past_end = buffer + saved_record_size;
298 /* For non-regexp search, move past impossible positions for a match. */
299 if (sentinel_length)
300 match_start -= match_length1;
302 for (;;)
304 /* Search backward from `match_start' - 1 to `buffer' for a match
305 with `separator'; for speed, use strncmp if `separator' contains no
306 metacharacters.
307 If the match succeeds, set `match_start' to point to the start of
308 the match and `match_length' to the length of the match.
309 Otherwise, make `match_start' < `buffer'. */
310 if (sentinel_length == 0)
312 int i = match_start - buffer;
313 int ret;
315 ret = re_search (&compiled_separator, buffer, i, i - 1, -i, &regs);
316 if (ret == -1)
317 match_start = buffer - 1;
318 else if (ret == -2)
320 error (0, 0, _("error in regular expression search"));
321 cleanup_fatal ();
323 else
325 match_start = buffer + regs.start[0];
326 match_length = regs.end[0] - regs.start[0];
329 else
331 /* `match_length' is constant for non-regexp boundaries. */
332 while (*--match_start != first_char
333 || (match_length1 && strncmp (match_start + 1, separator1,
334 match_length1)))
335 /* Do nothing. */ ;
338 /* Check whether we backed off the front of `buffer' without finding
339 a match for `separator'. */
340 if (match_start < buffer)
342 if (file_pos == 0)
344 /* Hit the beginning of the file; print the remaining record. */
345 output (buffer, past_end);
346 return 0;
349 saved_record_size = past_end - buffer;
350 if (saved_record_size > read_size)
352 /* `buffer_size' is about twice `read_size', so since
353 we want to read in another `read_size' bytes before
354 the data already in `buffer', we need to increase
355 `buffer_size'. */
356 char *newbuffer;
357 int offset = sentinel_length ? sentinel_length : 1;
359 read_size *= 2;
360 buffer_size = read_size * 2 + sentinel_length + 2;
361 newbuffer = xrealloc (buffer - offset, buffer_size) + offset;
362 /* Adjust the pointers for the new buffer location. */
363 match_start += newbuffer - buffer;
364 past_end += newbuffer - buffer;
365 buffer = newbuffer;
368 /* Back up to the start of the next bufferfull of the file. */
369 if (file_pos >= read_size)
370 file_pos -= read_size;
371 else
373 read_size = file_pos;
374 file_pos = 0;
376 lseek (fd, file_pos, SEEK_SET);
378 /* Shift the pending record data right to make room for the new.
379 The source and destination regions probably overlap. */
380 memmove (buffer + read_size, buffer, saved_record_size);
381 past_end = buffer + read_size + saved_record_size;
382 /* For non-regexp searches, avoid unneccessary scanning. */
383 if (sentinel_length)
384 match_start = buffer + read_size;
385 else
386 match_start = past_end;
388 if (safe_read (fd, buffer, read_size) != read_size)
390 error (0, errno, "%s", file);
391 return 1;
394 else
396 /* Found a match of `separator'. */
397 if (separator_ends_record)
399 char *match_end = match_start + match_length;
401 /* If this match of `separator' isn't at the end of the
402 file, print the record. */
403 if (first_time == 0 || match_end != past_end)
404 output (match_end, past_end);
405 past_end = match_end;
406 first_time = 0;
408 else
410 output (match_start, past_end);
411 past_end = match_start;
413 match_start -= match_length - 1;
418 /* Print FILE in reverse.
419 Return 0 if ok, 1 if an error occurs. */
421 static int
422 tac_file (const char *file)
424 int fd, errors;
426 fd = open (file, O_RDONLY);
427 if (fd == -1)
429 error (0, errno, "%s", file);
430 return 1;
432 errors = tac (fd, file);
433 if (close (fd) < 0)
435 error (0, errno, "%s", file);
436 return 1;
438 return errors;
441 /* Make a copy of the standard input in `tempfile'. */
443 static void
444 save_stdin (void)
446 static char *template = NULL;
447 static char *tempdir;
448 int fd;
449 int bytes_read;
451 if (template == NULL)
453 tempdir = getenv ("TMPDIR");
454 if (tempdir == NULL)
455 tempdir = DEFAULT_TMPDIR;
456 template = xmalloc (strlen (tempdir) + 11);
458 sprintf (template, "%s/tacXXXXXX", tempdir);
459 tempfile = mktemp (template);
461 fd = creat (tempfile, 0600);
462 if (fd == -1)
464 error (0, errno, "%s", tempfile);
465 cleanup_fatal ();
467 while ((bytes_read = safe_read (0, buffer, read_size)) > 0)
468 if (full_write (fd, buffer, bytes_read) < 0)
470 error (0, errno, "%s", tempfile);
471 cleanup_fatal ();
473 if (close (fd) < 0)
475 error (0, errno, "%s", tempfile);
476 cleanup_fatal ();
478 if (bytes_read == -1)
480 error (0, errno, _("read error"));
481 cleanup_fatal ();
485 /* Print the standard input in reverse, saving it to temporary
486 file `tempfile' first if it is a pipe.
487 Return 0 if ok, 1 if an error occurs. */
489 static int
490 tac_stdin (void)
492 /* Previous values of signal handlers. */
493 RETSIGTYPE (*sigint) (), (*sighup) (), (*sigpipe) (), (*sigterm) ();
494 int errors;
495 struct stat stats;
496 #ifdef SA_INTERRUPT
497 struct sigaction oldact, newact;
498 #endif /* SA_INTERRUPT */
500 /* No tempfile is needed for "tac < file".
501 Use fstat instead of checking for errno == ESPIPE because
502 lseek doesn't work on some special files but doesn't return an
503 error, either. */
504 if (fstat (0, &stats))
506 error (0, errno, _("standard input"));
507 return 1;
509 if (S_ISREG (stats.st_mode))
510 return tac (0, _("standard input"));
512 #ifdef SA_INTERRUPT
513 newact.sa_handler = sighandler;
514 sigemptyset (&newact.sa_mask);
515 newact.sa_flags = 0;
517 sigaction (SIGINT, NULL, &oldact);
518 sigint = oldact.sa_handler;
519 if (sigint != SIG_IGN)
520 sigaction (SIGINT, &newact, NULL);
522 sigaction (SIGHUP, NULL, &oldact);
523 sighup = oldact.sa_handler;
524 if (sighup != SIG_IGN)
525 sigaction (SIGHUP, &newact, NULL);
527 sigaction (SIGPIPE, NULL, &oldact);
528 sigpipe = oldact.sa_handler;
529 if (sigpipe != SIG_IGN)
530 sigaction (SIGPIPE, &newact, NULL);
532 sigaction (SIGTERM, NULL, &oldact);
533 sigterm = oldact.sa_handler;
534 if (sigterm != SIG_IGN)
535 sigaction (SIGTERM, &newact, NULL);
536 #else /* !SA_INTERRUPT */
537 sigint = signal (SIGINT, SIG_IGN);
538 if (sigint != SIG_IGN)
539 signal (SIGINT, sighandler);
541 sighup = signal (SIGHUP, SIG_IGN);
542 if (sighup != SIG_IGN)
543 signal (SIGHUP, sighandler);
545 sigpipe = signal (SIGPIPE, SIG_IGN);
546 if (sigpipe != SIG_IGN)
547 signal (SIGPIPE, sighandler);
549 sigterm = signal (SIGTERM, SIG_IGN);
550 if (sigterm != SIG_IGN)
551 signal (SIGTERM, sighandler);
552 #endif /* SA_INTERRUPT */
554 save_stdin ();
556 errors = tac_file (tempfile);
558 unlink (tempfile);
560 #ifdef SA_INTERRUPT
561 newact.sa_handler = sigint;
562 sigaction (SIGINT, &newact, NULL);
563 newact.sa_handler = sighup;
564 sigaction (SIGHUP, &newact, NULL);
565 newact.sa_handler = sigterm;
566 sigaction (SIGTERM, &newact, NULL);
567 newact.sa_handler = sigpipe;
568 sigaction (SIGPIPE, &newact, NULL);
569 #else /* !SA_INTERRUPT */
570 signal (SIGINT, sigint);
571 signal (SIGHUP, sighup);
572 signal (SIGTERM, sigterm);
573 signal (SIGPIPE, sigpipe);
574 #endif /* SA_INTERRUPT */
576 return errors;
579 void
580 main (int argc, char **argv)
582 const char *error_message; /* Return value from re_compile_pattern. */
583 int optc, errors;
584 int have_read_stdin = 0;
586 program_name = argv[0];
587 errors = 0;
588 separator = "\n";
589 sentinel_length = 1;
590 separator_ends_record = 1;
592 while ((optc = getopt_long (argc, argv, "brs:", longopts, (int *) 0))
593 != EOF)
595 switch (optc)
597 case 0:
598 break;
599 case 'b':
600 separator_ends_record = 0;
601 break;
602 case 'r':
603 sentinel_length = 0;
604 break;
605 case 's':
606 separator = optarg;
607 if (*separator == 0)
608 error (1, 0, _("separator cannot be empty"));
609 break;
610 default:
611 usage (1);
615 if (show_version)
617 printf ("tac - %s\n", version_string);
618 exit (0);
621 if (show_help)
622 usage (0);
624 if (sentinel_length == 0)
626 compiled_separator.allocated = 100;
627 compiled_separator.buffer = (unsigned char *)
628 xmalloc (compiled_separator.allocated);
629 compiled_separator.fastmap = xmalloc (256);
630 compiled_separator.translate = 0;
631 error_message = re_compile_pattern (separator, strlen (separator),
632 &compiled_separator);
633 if (error_message)
634 error (1, 0, "%s", error_message);
636 else
637 match_length = sentinel_length = strlen (separator);
639 read_size = INITIAL_READSIZE;
640 /* A precaution that will probably never be needed. */
641 while (sentinel_length * 2 >= read_size)
642 read_size *= 2;
643 buffer_size = read_size * 2 + sentinel_length + 2;
644 buffer = xmalloc (buffer_size);
645 if (sentinel_length)
647 strcpy (buffer, separator);
648 buffer += sentinel_length;
650 else
651 ++buffer;
653 if (optind == argc)
655 have_read_stdin = 1;
656 errors = tac_stdin ();
658 else
659 for (; optind < argc; ++optind)
661 if (strcmp (argv[optind], "-") == 0)
663 have_read_stdin = 1;
664 errors |= tac_stdin ();
666 else
667 errors |= tac_file (argv[optind]);
670 /* Flush the output buffer. */
671 output ((char *) NULL, (char *) NULL);
673 if (have_read_stdin && close (0) < 0)
674 error (1, errno, "-");
675 if (close (1) < 0)
676 error (1, errno, _("write error"));
677 exit (errors);