.
[coreutils.git] / src / tac.c
blob6d4591723d626fbeb78cc87c29ad6519a1274f84
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988, 1989, 1990, 1991, 1995 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
26 follows in the file.
28 Options:
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
35 tac -r -s '.\|
36 ' file */
38 #include <config.h>
40 #include <stdio.h>
41 #include <getopt.h>
42 #include <sys/types.h>
43 #include <signal.h>
44 #include <regex.h>
45 #include "system.h"
46 #include "version.h"
47 #include "error.h"
49 #ifndef STDC_HEADERS
50 char *malloc ();
51 char *realloc ();
52 #endif
54 #ifndef DEFAULT_TMPDIR
55 #define DEFAULT_TMPDIR "/tmp"
56 #endif
58 /* The number of bytes per atomic read. */
59 #define INITIAL_READSIZE 8192
61 /* The number of bytes per atomic write. */
62 #define WRITESIZE 8192
64 char *mktemp ();
66 static RETSIGTYPE cleanup ();
67 static int tac ();
68 static int tac_file ();
69 static int tac_stdin ();
70 static char *xmalloc ();
71 static char *xrealloc ();
72 static void output ();
73 static void save_stdin ();
74 static void xwrite ();
76 int full_write ();
77 int safe_read ();
79 /* The name this program was run with. */
80 char *program_name;
82 /* The string that separates the records of the file. */
83 static char *separator;
85 /* If nonzero, print `separator' along with the record preceding it
86 in the file; otherwise with the record following it. */
87 static int separator_ends_record;
89 /* 0 if `separator' is to be matched as a regular expression;
90 otherwise, the length of `separator', used as a sentinel to
91 stop the search. */
92 static int sentinel_length;
94 /* The length of a match with `separator'. If `sentinel_length' is 0,
95 `match_length' is computed every time a match succeeds;
96 otherwise, it is simply the length of `separator'. */
97 static int match_length;
99 /* The input buffer. */
100 static char *buffer;
102 /* The number of bytes to read at once into `buffer'. */
103 static unsigned read_size;
105 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
106 The extra 2 bytes allow `past_end' to have a value beyond the
107 end of `buffer' and `match_start' to run off the front of `buffer'. */
108 static unsigned buffer_size;
110 /* The compiled regular expression representing `separator'. */
111 static struct re_pattern_buffer compiled_separator;
113 /* If non-zero, display usage information and exit. */
114 static int show_help;
116 /* If non-zero, print the version on standard output then exit. */
117 static int show_version;
119 static struct option const longopts[] =
121 {"before", no_argument, &separator_ends_record, 0},
122 {"regex", no_argument, &sentinel_length, 0},
123 {"separator", required_argument, NULL, 's'},
124 {"help", no_argument, &show_help, 1},
125 {"version", no_argument, &show_version, 1},
126 {NULL, 0, NULL, 0}
129 static void
130 usage (status)
131 int status;
133 if (status != 0)
134 fprintf (stderr, "Try `%s --help' for more information.\n",
135 program_name);
136 else
138 printf ("\
139 Usage: %s [OPTION]... [FILE]...\n\
141 program_name);
142 printf ("\
143 Write each FILE to standard output, last line first.\n\
144 With no FILE, or when FILE is -, read standard input.\n\
146 -b, --before attach the separator before instead of after\n\
147 -r, --regex interpret the separator as a regular expression\n\
148 -s, --separator=STRING use STRING as the separator instead of newline\n\
149 --help display this help and exit\n\
150 --version output version information and exit\n\
153 exit (status);
156 void
157 main (argc, argv)
158 int argc;
159 char **argv;
161 const char *error_message; /* Return value from re_compile_pattern. */
162 int optc, errors;
163 int have_read_stdin = 0;
165 program_name = argv[0];
166 errors = 0;
167 separator = "\n";
168 sentinel_length = 1;
169 separator_ends_record = 1;
171 while ((optc = getopt_long (argc, argv, "brs:", longopts, (int *) 0))
172 != EOF)
174 switch (optc)
176 case 0:
177 break;
178 case 'b':
179 separator_ends_record = 0;
180 break;
181 case 'r':
182 sentinel_length = 0;
183 break;
184 case 's':
185 separator = optarg;
186 if (*separator == 0)
187 error (1, 0, "separator cannot be empty");
188 break;
189 default:
190 usage (1);
194 if (show_version)
196 printf ("tac - %s\n", version_string);
197 exit (0);
200 if (show_help)
201 usage (0);
203 if (sentinel_length == 0)
205 compiled_separator.allocated = 100;
206 compiled_separator.buffer = (unsigned char *)
207 xmalloc (compiled_separator.allocated);
208 compiled_separator.fastmap = xmalloc (256);
209 compiled_separator.translate = 0;
210 error_message = re_compile_pattern (separator, strlen (separator),
211 &compiled_separator);
212 if (error_message)
213 error (1, 0, "%s", error_message);
215 else
216 match_length = sentinel_length = strlen (separator);
218 read_size = INITIAL_READSIZE;
219 /* A precaution that will probably never be needed. */
220 while (sentinel_length * 2 >= read_size)
221 read_size *= 2;
222 buffer_size = read_size * 2 + sentinel_length + 2;
223 buffer = xmalloc (buffer_size);
224 if (sentinel_length)
226 strcpy (buffer, separator);
227 buffer += sentinel_length;
229 else
230 ++buffer;
232 if (optind == argc)
234 have_read_stdin = 1;
235 errors = tac_stdin ();
237 else
238 for (; optind < argc; ++optind)
240 if (strcmp (argv[optind], "-") == 0)
242 have_read_stdin = 1;
243 errors |= tac_stdin ();
245 else
246 errors |= tac_file (argv[optind]);
249 /* Flush the output buffer. */
250 output ((char *) NULL, (char *) NULL);
252 if (have_read_stdin && close (0) < 0)
253 error (1, errno, "-");
254 if (close (1) < 0)
255 error (1, errno, "write error");
256 exit (errors);
259 /* The name of a temporary file containing a copy of pipe input. */
260 char *tempfile;
262 /* Print the standard input in reverse, saving it to temporary
263 file `tempfile' first if it is a pipe.
264 Return 0 if ok, 1 if an error occurs. */
266 static int
267 tac_stdin ()
269 /* Previous values of signal handlers. */
270 RETSIGTYPE (*sigint) (), (*sighup) (), (*sigpipe) (), (*sigterm) ();
271 int errors;
272 struct stat stats;
273 #ifdef _POSIX_VERSION
274 struct sigaction oldact, newact;
275 #endif /* _POSIX_VERSION */
277 /* No tempfile is needed for "tac < file".
278 Use fstat instead of checking for errno == ESPIPE because
279 lseek doesn't work on some special files but doesn't return an
280 error, either. */
281 if (fstat (0, &stats))
283 error (0, errno, "standard input");
284 return 1;
286 if (S_ISREG (stats.st_mode))
287 return tac (0, "standard input");
289 #ifdef _POSIX_VERSION
290 newact.sa_handler = cleanup;
291 sigemptyset (&newact.sa_mask);
292 newact.sa_flags = 0;
294 sigaction (SIGINT, NULL, &oldact);
295 sigint = oldact.sa_handler;
296 if (sigint != SIG_IGN)
297 sigaction (SIGINT, &newact, NULL);
299 sigaction (SIGHUP, NULL, &oldact);
300 sighup = oldact.sa_handler;
301 if (sighup != SIG_IGN)
302 sigaction (SIGHUP, &newact, NULL);
304 sigaction (SIGPIPE, NULL, &oldact);
305 sigpipe = oldact.sa_handler;
306 if (sigpipe != SIG_IGN)
307 sigaction (SIGPIPE, &newact, NULL);
309 sigaction (SIGTERM, NULL, &oldact);
310 sigterm = oldact.sa_handler;
311 if (sigterm != SIG_IGN)
312 sigaction (SIGTERM, &newact, NULL);
313 #else /* !_POSIX_VERSION */
314 sigint = signal (SIGINT, SIG_IGN);
315 if (sigint != SIG_IGN)
316 signal (SIGINT, cleanup);
318 sighup = signal (SIGHUP, SIG_IGN);
319 if (sighup != SIG_IGN)
320 signal (SIGHUP, cleanup);
322 sigpipe = signal (SIGPIPE, SIG_IGN);
323 if (sigpipe != SIG_IGN)
324 signal (SIGPIPE, cleanup);
326 sigterm = signal (SIGTERM, SIG_IGN);
327 if (sigterm != SIG_IGN)
328 signal (SIGTERM, cleanup);
329 #endif /* _POSIX_VERSION */
331 save_stdin ();
333 errors = tac_file (tempfile);
335 unlink (tempfile);
337 #ifdef _POSIX_VERSION
338 newact.sa_handler = sigint;
339 sigaction (SIGINT, &newact, NULL);
340 newact.sa_handler = sighup;
341 sigaction (SIGHUP, &newact, NULL);
342 newact.sa_handler = sigterm;
343 sigaction (SIGTERM, &newact, NULL);
344 newact.sa_handler = sigpipe;
345 sigaction (SIGPIPE, &newact, NULL);
346 #else /* !_POSIX_VERSION */
347 signal (SIGINT, sigint);
348 signal (SIGHUP, sighup);
349 signal (SIGTERM, sigterm);
350 signal (SIGPIPE, sigpipe);
351 #endif /* _POSIX_VERSION */
353 return errors;
356 /* Make a copy of the standard input in `tempfile'. */
358 static void
359 save_stdin ()
361 static char *template = NULL;
362 static char *tempdir;
363 int fd;
364 int bytes_read;
366 if (template == NULL)
368 tempdir = getenv ("TMPDIR");
369 if (tempdir == NULL)
370 tempdir = DEFAULT_TMPDIR;
371 template = xmalloc (strlen (tempdir) + 11);
373 sprintf (template, "%s/tacXXXXXX", tempdir);
374 tempfile = mktemp (template);
376 fd = creat (tempfile, 0600);
377 if (fd == -1)
379 error (0, errno, "%s", tempfile);
380 cleanup ();
382 while ((bytes_read = safe_read (0, buffer, read_size)) > 0)
383 if (full_write (fd, buffer, bytes_read) < 0)
385 error (0, errno, "%s", tempfile);
386 cleanup ();
388 if (close (fd) < 0)
390 error (0, errno, "%s", tempfile);
391 cleanup ();
393 if (bytes_read == -1)
395 error (0, errno, "read error");
396 cleanup ();
400 /* Print FILE in reverse.
401 Return 0 if ok, 1 if an error occurs. */
403 static int
404 tac_file (file)
405 char *file;
407 int fd, errors;
409 fd = open (file, O_RDONLY);
410 if (fd == -1)
412 error (0, errno, "%s", file);
413 return 1;
415 errors = tac (fd, file);
416 if (close (fd) < 0)
418 error (0, errno, "%s", file);
419 return 1;
421 return errors;
424 /* Print in reverse the file open on descriptor FD for reading FILE.
425 Return 0 if ok, 1 if an error occurs. */
427 static int
428 tac (fd, file)
429 int fd;
430 char *file;
432 /* Pointer to the location in `buffer' where the search for
433 the next separator will begin. */
434 char *match_start;
435 /* Pointer to one past the rightmost character in `buffer' that
436 has not been printed yet. */
437 char *past_end;
438 unsigned saved_record_size; /* Length of the record growing in `buffer'. */
439 off_t file_pos; /* Offset in the file of the next read. */
440 /* Nonzero if `output' has not been called yet for any file.
441 Only used when the separator is attached to the preceding record. */
442 int first_time = 1;
443 char first_char = *separator; /* Speed optimization, non-regexp. */
444 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
445 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
446 struct re_registers regs;
448 /* Find the size of the input file. */
449 file_pos = lseek (fd, (off_t) 0, SEEK_END);
450 if (file_pos < 1)
451 return 0; /* It's an empty file. */
453 /* Arrange for the first read to lop off enough to leave the rest of the
454 file a multiple of `read_size'. Since `read_size' can change, this may
455 not always hold during the program run, but since it usually will, leave
456 it here for i/o efficiency (page/sector boundaries and all that).
457 Note: the efficiency gain has not been verified. */
458 saved_record_size = file_pos % read_size;
459 if (saved_record_size == 0)
460 saved_record_size = read_size;
461 file_pos -= saved_record_size;
462 /* `file_pos' now points to the start of the last (probably partial) block
463 in the input file. */
465 lseek (fd, file_pos, SEEK_SET);
466 if (safe_read (fd, buffer, saved_record_size) != saved_record_size)
468 error (0, errno, "%s", file);
469 return 1;
472 match_start = past_end = buffer + saved_record_size;
473 /* For non-regexp search, move past impossible positions for a match. */
474 if (sentinel_length)
475 match_start -= match_length1;
477 for (;;)
479 /* Search backward from `match_start' - 1 to `buffer' for a match
480 with `separator'; for speed, use strncmp if `separator' contains no
481 metacharacters.
482 If the match succeeds, set `match_start' to point to the start of
483 the match and `match_length' to the length of the match.
484 Otherwise, make `match_start' < `buffer'. */
485 if (sentinel_length == 0)
487 int i = match_start - buffer;
488 int ret;
490 ret = re_search (&compiled_separator, buffer, i, i - 1, -i, &regs);
491 if (ret == -1)
492 match_start = buffer - 1;
493 else if (ret == -2)
495 error (0, 0, "error in regular expression search");
496 cleanup ();
498 else
500 match_start = buffer + regs.start[0];
501 match_length = regs.end[0] - regs.start[0];
504 else
506 /* `match_length' is constant for non-regexp boundaries. */
507 while (*--match_start != first_char
508 || (match_length1 && strncmp (match_start + 1, separator1,
509 match_length1)))
510 /* Do nothing. */ ;
513 /* Check whether we backed off the front of `buffer' without finding
514 a match for `separator'. */
515 if (match_start < buffer)
517 if (file_pos == 0)
519 /* Hit the beginning of the file; print the remaining record. */
520 output (buffer, past_end);
521 return 0;
524 saved_record_size = past_end - buffer;
525 if (saved_record_size > read_size)
527 /* `buffer_size' is about twice `read_size', so since
528 we want to read in another `read_size' bytes before
529 the data already in `buffer', we need to increase
530 `buffer_size'. */
531 char *newbuffer;
532 int offset = sentinel_length ? sentinel_length : 1;
534 read_size *= 2;
535 buffer_size = read_size * 2 + sentinel_length + 2;
536 newbuffer = xrealloc (buffer - offset, buffer_size) + offset;
537 /* Adjust the pointers for the new buffer location. */
538 match_start += newbuffer - buffer;
539 past_end += newbuffer - buffer;
540 buffer = newbuffer;
543 /* Back up to the start of the next bufferfull of the file. */
544 if (file_pos >= read_size)
545 file_pos -= read_size;
546 else
548 read_size = file_pos;
549 file_pos = 0;
551 lseek (fd, file_pos, SEEK_SET);
553 /* Shift the pending record data right to make room for the new.
554 The source and destination regions probably overlap. */
555 memmove (buffer + read_size, buffer, saved_record_size);
556 past_end = buffer + read_size + saved_record_size;
557 /* For non-regexp searches, avoid unneccessary scanning. */
558 if (sentinel_length)
559 match_start = buffer + read_size;
560 else
561 match_start = past_end;
563 if (safe_read (fd, buffer, read_size) != read_size)
565 error (0, errno, "%s", file);
566 return 1;
569 else
571 /* Found a match of `separator'. */
572 if (separator_ends_record)
574 char *match_end = match_start + match_length;
576 /* If this match of `separator' isn't at the end of the
577 file, print the record. */
578 if (first_time == 0 || match_end != past_end)
579 output (match_end, past_end);
580 past_end = match_end;
581 first_time = 0;
583 else
585 output (match_start, past_end);
586 past_end = match_start;
588 match_start -= match_length - 1;
593 /* Print the characters from START to PAST_END - 1.
594 If START is NULL, just flush the buffer. */
596 static void
597 output (start, past_end)
598 char *start;
599 char *past_end;
601 static char buffer[WRITESIZE];
602 static int bytes_in_buffer = 0;
603 int bytes_to_add = past_end - start;
604 int bytes_available = WRITESIZE - bytes_in_buffer;
606 if (start == 0)
608 xwrite (STDOUT_FILENO, buffer, bytes_in_buffer);
609 bytes_in_buffer = 0;
610 return;
613 /* Write out as many full buffers as possible. */
614 while (bytes_to_add >= bytes_available)
616 memcpy (buffer + bytes_in_buffer, start, bytes_available);
617 bytes_to_add -= bytes_available;
618 start += bytes_available;
619 xwrite (STDOUT_FILENO, buffer, WRITESIZE);
620 bytes_in_buffer = 0;
621 bytes_available = WRITESIZE;
624 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
625 bytes_in_buffer += bytes_to_add;
628 static RETSIGTYPE
629 cleanup ()
631 unlink (tempfile);
632 exit (1);
635 static void
636 xwrite (desc, buffer, size)
637 int desc;
638 char *buffer;
639 int size;
641 if (full_write (desc, buffer, size) < 0)
643 error (0, errno, "write error");
644 cleanup ();
648 /* Allocate N bytes of memory dynamically, with error checking. */
650 static char *
651 xmalloc (n)
652 unsigned n;
654 char *p;
656 p = malloc (n);
657 if (p == 0)
659 error (0, 0, "virtual memory exhausted");
660 cleanup ();
662 return p;
665 /* Change the size of memory area P to N bytes, with error checking. */
667 static char *
668 xrealloc (p, n)
669 char *p;
670 unsigned n;
672 p = realloc (p, n);
673 if (p == 0)
675 error (0, 0, "virtual memory exhausted");
676 cleanup ();
678 return p;