.
[coreutils.git] / src / tac.c
blobf55fb6ffb2067f920c43d69e1930e5ee4f29737f
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 88, 89, 90, 91, 95, 96, 1997, 1998 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
26 follows in the file.
28 Options:
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
35 tac -r -s '.\|
36 ' file */
38 #include <config.h>
40 #include <stdio.h>
41 #include <getopt.h>
42 #include <sys/types.h>
43 #include "system.h"
45 #if WITH_REGEX
46 # include <regex.h>
47 #else
48 # include <rx.h>
49 #endif
51 #include "error.h"
53 #ifndef DEFAULT_TMPDIR
54 # define DEFAULT_TMPDIR "/tmp"
55 #endif
57 /* The number of bytes per atomic read. */
58 #define INITIAL_READSIZE 8192
60 /* The number of bytes per atomic write. */
61 #define WRITESIZE 8192
63 char *mktemp ();
64 int safe_read ();
66 /* The name this program was run with. */
67 char *program_name;
69 /* The string that separates the records of the file. */
70 static char *separator;
72 /* If nonzero, print `separator' along with the record preceding it
73 in the file; otherwise with the record following it. */
74 static int separator_ends_record;
76 /* 0 if `separator' is to be matched as a regular expression;
77 otherwise, the length of `separator', used as a sentinel to
78 stop the search. */
79 static int sentinel_length;
81 /* The length of a match with `separator'. If `sentinel_length' is 0,
82 `match_length' is computed every time a match succeeds;
83 otherwise, it is simply the length of `separator'. */
84 static int match_length;
86 /* The input buffer. */
87 static char *G_buffer;
89 /* The number of bytes to read at once into `buffer'. */
90 static unsigned read_size;
92 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
93 The extra 2 bytes allow `past_end' to have a value beyond the
94 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
95 static unsigned G_buffer_size;
97 /* The compiled regular expression representing `separator'. */
98 static struct re_pattern_buffer compiled_separator;
100 /* If nonzero, display usage information and exit. */
101 static int show_help;
103 /* If nonzero, print the version on standard output then exit. */
104 static int show_version;
106 static struct option const longopts[] =
108 {"before", no_argument, &separator_ends_record, 0},
109 {"regex", no_argument, &sentinel_length, 0},
110 {"separator", required_argument, NULL, 's'},
111 {"help", no_argument, &show_help, 1},
112 {"version", no_argument, &show_version, 1},
113 {NULL, 0, NULL, 0}
116 /* Read LEN bytes at PTR from descriptor DESC, retrying if interrupted.
117 Return the actual number of bytes read, zero for EOF, or negative
118 for an error. */
121 safe_read (int desc, char *ptr, int len)
123 int n_chars;
125 if (len <= 0)
126 return len;
128 #ifdef EINTR
131 n_chars = read (desc, ptr, len);
133 while (n_chars < 0 && errno == EINTR);
134 #else
135 n_chars = read (desc, ptr, len);
136 #endif
138 return n_chars;
141 static void
142 usage (int status)
144 if (status != 0)
145 fprintf (stderr, _("Try `%s --help' for more information.\n"),
146 program_name);
147 else
149 printf (_("\
150 Usage: %s [OPTION]... [FILE]...\n\
152 program_name);
153 printf (_("\
154 Write each FILE to standard output, last line first.\n\
155 With no FILE, or when FILE is -, read standard input.\n\
157 -b, --before attach the separator before instead of after\n\
158 -r, --regex interpret the separator as a regular expression\n\
159 -s, --separator=STRING use STRING as the separator instead of newline\n\
160 --help display this help and exit\n\
161 --version output version information and exit\n\
162 "));
163 puts (_("\nReport bugs to <textutils-bugs@gnu.org>."));
165 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
168 /* Print the characters from START to PAST_END - 1.
169 If START is NULL, just flush the buffer. */
171 static void
172 output (const char *start, const char *past_end)
174 static char buffer[WRITESIZE];
175 static int bytes_in_buffer = 0;
176 int bytes_to_add = past_end - start;
177 int bytes_available = WRITESIZE - bytes_in_buffer;
179 if (start == 0)
181 fwrite (buffer, 1, bytes_in_buffer, stdout);
182 bytes_in_buffer = 0;
183 return;
186 /* Write out as many full buffers as possible. */
187 while (bytes_to_add >= bytes_available)
189 memcpy (buffer + bytes_in_buffer, start, bytes_available);
190 bytes_to_add -= bytes_available;
191 start += bytes_available;
192 fwrite (buffer, 1, WRITESIZE, stdout);
193 bytes_in_buffer = 0;
194 bytes_available = WRITESIZE;
197 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
198 bytes_in_buffer += bytes_to_add;
201 /* Print in reverse the file open on descriptor FD for reading FILE.
202 Return 0 if ok, 1 if an error occurs. */
204 static int
205 tac_stream (FILE *in, const char *file)
207 /* Pointer to the location in `G_buffer' where the search for
208 the next separator will begin. */
209 char *match_start;
211 /* Pointer to one past the rightmost character in `G_buffer' that
212 has not been printed yet. */
213 char *past_end;
215 /* Length of the record growing in `G_buffer'. */
216 unsigned saved_record_size;
218 /* Offset in the file of the next read. */
219 off_t file_pos;
221 /* Nonzero if `output' has not been called yet for any file.
222 Only used when the separator is attached to the preceding record. */
223 int first_time = 1;
224 char first_char = *separator; /* Speed optimization, non-regexp. */
225 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
226 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
227 struct re_registers regs;
229 /* Find the size of the input file. */
230 file_pos = lseek (fileno (in), (off_t) 0, SEEK_END);
231 if (file_pos < 1)
232 return 0; /* It's an empty file. */
234 /* Arrange for the first read to lop off enough to leave the rest of the
235 file a multiple of `read_size'. Since `read_size' can change, this may
236 not always hold during the program run, but since it usually will, leave
237 it here for i/o efficiency (page/sector boundaries and all that).
238 Note: the efficiency gain has not been verified. */
239 saved_record_size = file_pos % read_size;
240 if (saved_record_size == 0)
241 saved_record_size = read_size;
242 file_pos -= saved_record_size;
243 /* `file_pos' now points to the start of the last (probably partial) block
244 in the input file. */
246 lseek (fileno (in), file_pos, SEEK_SET);
247 if (safe_read (fileno (in), G_buffer, saved_record_size) != saved_record_size)
249 error (0, errno, "%s", file);
250 return 1;
253 match_start = past_end = G_buffer + saved_record_size;
254 /* For non-regexp search, move past impossible positions for a match. */
255 if (sentinel_length)
256 match_start -= match_length1;
258 for (;;)
260 /* Search backward from `match_start' - 1 to `G_buffer' for a match
261 with `separator'; for speed, use strncmp if `separator' contains no
262 metacharacters.
263 If the match succeeds, set `match_start' to point to the start of
264 the match and `match_length' to the length of the match.
265 Otherwise, make `match_start' < `G_buffer'. */
266 if (sentinel_length == 0)
268 int i = match_start - G_buffer;
269 int ret;
271 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, &regs);
272 if (ret == -1)
273 match_start = G_buffer - 1;
274 else if (ret == -2)
276 error (EXIT_FAILURE, 0,
277 _("error in regular expression search"));
279 else
281 match_start = G_buffer + regs.start[0];
282 match_length = regs.end[0] - regs.start[0];
285 else
287 /* `match_length' is constant for non-regexp boundaries. */
288 while (*--match_start != first_char
289 || (match_length1 && strncmp (match_start + 1, separator1,
290 match_length1)))
291 /* Do nothing. */ ;
294 /* Check whether we backed off the front of `G_buffer' without finding
295 a match for `separator'. */
296 if (match_start < G_buffer)
298 if (file_pos == 0)
300 /* Hit the beginning of the file; print the remaining record. */
301 output (G_buffer, past_end);
302 return 0;
305 saved_record_size = past_end - G_buffer;
306 if (saved_record_size > read_size)
308 /* `G_buffer_size' is about twice `read_size', so since
309 we want to read in another `read_size' bytes before
310 the data already in `G_buffer', we need to increase
311 `G_buffer_size'. */
312 char *newbuffer;
313 int offset = sentinel_length ? sentinel_length : 1;
315 read_size *= 2;
316 G_buffer_size = read_size * 2 + sentinel_length + 2;
317 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
318 newbuffer += offset;
319 /* Adjust the pointers for the new buffer location. */
320 match_start += newbuffer - G_buffer;
321 past_end += newbuffer - G_buffer;
322 G_buffer = newbuffer;
325 /* Back up to the start of the next bufferfull of the file. */
326 if (file_pos >= read_size)
327 file_pos -= read_size;
328 else
330 read_size = file_pos;
331 file_pos = 0;
333 lseek (fileno (in), file_pos, SEEK_SET);
335 /* Shift the pending record data right to make room for the new.
336 The source and destination regions probably overlap. */
337 memmove (G_buffer + read_size, G_buffer, saved_record_size);
338 past_end = G_buffer + read_size + saved_record_size;
339 /* For non-regexp searches, avoid unneccessary scanning. */
340 if (sentinel_length)
341 match_start = G_buffer + read_size;
342 else
343 match_start = past_end;
345 if (safe_read (fileno (in), G_buffer, read_size) != read_size)
347 error (0, errno, "%s", file);
348 return 1;
351 else
353 /* Found a match of `separator'. */
354 if (separator_ends_record)
356 char *match_end = match_start + match_length;
358 /* If this match of `separator' isn't at the end of the
359 file, print the record. */
360 if (first_time == 0 || match_end != past_end)
361 output (match_end, past_end);
362 past_end = match_end;
363 first_time = 0;
365 else
367 output (match_start, past_end);
368 past_end = match_start;
370 match_start -= match_length - 1;
375 /* Print FILE in reverse.
376 Return 0 if ok, 1 if an error occurs. */
378 static int
379 tac_file (const char *file)
381 int errors;
382 FILE *in;
384 in = fopen (file, "r");
385 if (in == NULL)
387 error (0, errno, "%s", file);
388 return 1;
390 errors = tac_stream (in, file);
391 if (ferror (in) || fclose (in) == EOF)
393 error (0, errno, "%s", file);
394 return 1;
396 return errors;
399 /* Make a copy of the standard input in `FIXME'. */
401 static void
402 save_stdin (FILE **g_tmp, char **g_tempfile)
404 static char *template = NULL;
405 static char *tempdir;
406 static char *tempfile;
407 FILE *tmp;
408 int bytes_read;
409 int fd;
411 if (template == NULL)
413 tempdir = getenv ("TMPDIR");
414 if (tempdir == NULL)
415 tempdir = DEFAULT_TMPDIR;
416 template = xmalloc (strlen (tempdir) + 11);
418 sprintf (template, "%s/tacXXXXXX", tempdir);
419 tempfile = mktemp (template);
421 fd = creat (tempfile, 0600);
422 if (fd == -1 || (tmp = fdopen (fd, "rw")) == NULL)
423 error (EXIT_FAILURE, errno, "%s", tempfile);
424 tmp = fdopen (fd, "rw");
425 if (tmp == NULL)
426 error (EXIT_FAILURE, errno, "%s", tempfile);
427 unlink (tempfile);
429 while ((bytes_read = safe_read (0, G_buffer, read_size)) > 0)
430 fwrite (G_buffer, 1, bytes_read, tmp);
432 if (ferror (tmp) || fflush (tmp) == EOF)
433 error (EXIT_FAILURE, errno, "%s", tempfile);
435 if (fseek (tmp, (long int) 0, SEEK_SET))
436 error (EXIT_FAILURE, errno, "%s", tempfile);
438 if (bytes_read == -1)
439 error (EXIT_FAILURE, errno, _("read error"));
441 *g_tmp = tmp;
442 *g_tempfile = tempfile;
445 /* Print the standard input in reverse, saving it to temporary
446 file first if it is a pipe.
447 Return 0 if ok, 1 if an error occurs. */
449 static int
450 tac_stdin (void)
452 int errors;
453 struct stat stats;
455 /* No tempfile is needed for "tac < file".
456 Use fstat instead of checking for errno == ESPIPE because
457 lseek doesn't work on some special files but doesn't return an
458 error, either. */
459 if (fstat (0, &stats))
461 error (0, errno, _("standard input"));
462 return 1;
465 if (S_ISREG (stats.st_mode))
467 errors = tac_stream (stdin, _("standard input"));
469 else
471 FILE *tmp_stream;
472 char *tmp_file;
473 save_stdin (&tmp_stream, &tmp_file);
474 errors = tac_stream (tmp_stream, tmp_file);
477 return errors;
480 /* BUF_END_PLUS_ONE points one byte past the end of the buffer
481 to be searched. */
483 static void *
484 memrchr (const char *buf_start, const char *buf_end_plus_one, int c)
486 const char *p = buf_end_plus_one;
487 while (buf_start <= --p)
489 if (*(const unsigned char *) p == c)
490 return (void *) p;
492 return NULL;
495 static int
496 tac_mem (const char *buf, size_t n_bytes, FILE *out)
498 if (n_bytes == 0)
499 return 0;
502 const char *nl = memrchr (buf, buf + n_bytes, '\n');
503 const char *bol = (nl == NULL ? buf : nl + 1);
505 /* If the last line of the input file has no terminating newline,
506 treat it as a special case. */
507 if (bol < buf + n_bytes)
509 /* Print out the line from bol to end of input. */
510 fwrite (bol, 1, (buf + n_bytes) - bol, out);
512 /* Add a newline here. Otherwise, the first and second lines
513 of output would appear to have been joined. */
514 fputc ('\n', out);
517 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
519 /* Output the line (which includes a trailing newline)
520 from NL+1 to BOL-1. */
521 fwrite (nl + 1, 1, bol - (nl + 1), out);
523 bol = nl + 1;
526 /* If there's anything left, output the last line: BUF .. BOL-1.
527 When the first byte of the input is a newline, there is nothing
528 left to do here. */
529 if (buf < bol)
530 fwrite (buf, 1, bol - buf, out);
534 static int
535 tac_stdin_to_mem (void)
537 char *buf = NULL;
538 size_t bufsiz = 8 * BUFSIZ;
539 size_t delta = 8 * BUFSIZ;
540 size_t n_bytes = 0;
542 while (1)
544 int bytes_read;
545 if (buf == NULL)
546 buf = (char *) malloc (bufsiz);
547 else
548 buf = (char *) realloc (buf, bufsiz);
550 if (buf == NULL)
552 /* Free the buffer and fall back on the code that relies on a
553 temporary file. */
554 free (buf);
555 /* FIXME */
556 abort ();
558 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
559 if (bytes_read == 0)
560 break;
561 n_bytes += bytes_read;
562 if (bytes_read < 0)
563 error (1, errno, _("read error"));
565 bufsiz += delta;
568 tac_mem (buf, n_bytes, stdout);
570 return 0;
574 main (int argc, char **argv)
576 const char *error_message; /* Return value from re_compile_pattern. */
577 int optc, errors;
578 int have_read_stdin = 0;
580 program_name = argv[0];
581 setlocale (LC_ALL, "");
582 bindtextdomain (PACKAGE, LOCALEDIR);
583 textdomain (PACKAGE);
585 errors = 0;
586 separator = "\n";
587 sentinel_length = 1;
588 separator_ends_record = 1;
590 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
592 switch (optc)
594 case 0:
595 break;
596 case 'b':
597 separator_ends_record = 0;
598 break;
599 case 'r':
600 sentinel_length = 0;
601 break;
602 case 's':
603 separator = optarg;
604 if (*separator == 0)
605 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
606 break;
607 default:
608 usage (1);
612 if (show_version)
614 printf ("tac (%s) %s\n", GNU_PACKAGE, VERSION);
615 exit (EXIT_SUCCESS);
618 if (show_help)
619 usage (0);
621 if (sentinel_length == 0)
623 compiled_separator.allocated = 100;
624 compiled_separator.buffer = (unsigned char *)
625 xmalloc (compiled_separator.allocated);
626 compiled_separator.fastmap = xmalloc (256);
627 compiled_separator.translate = 0;
628 error_message = re_compile_pattern (separator, strlen (separator),
629 &compiled_separator);
630 if (error_message)
631 error (EXIT_FAILURE, 0, "%s", error_message);
633 else
634 match_length = sentinel_length = strlen (separator);
636 read_size = INITIAL_READSIZE;
637 /* A precaution that will probably never be needed. */
638 while (sentinel_length * 2 >= read_size)
639 read_size *= 2;
640 G_buffer_size = read_size * 2 + sentinel_length + 2;
641 G_buffer = xmalloc (G_buffer_size);
642 if (sentinel_length)
644 strcpy (G_buffer, separator);
645 G_buffer += sentinel_length;
647 else
648 ++G_buffer;
650 if (optind == argc)
652 have_read_stdin = 1;
653 errors = tac_stdin_to_mem ();
655 else
656 for (; optind < argc; ++optind)
658 if (strcmp (argv[optind], "-") == 0)
660 have_read_stdin = 1;
661 errors |= tac_stdin_to_mem ();
663 else
664 errors |= tac_file (argv[optind]);
667 /* Flush the output buffer. */
668 output ((char *) NULL, (char *) NULL);
670 if (have_read_stdin && close (0) < 0)
671 error (EXIT_FAILURE, errno, "-");
672 if (ferror (stdout) || fclose (stdout) == EOF)
673 error (EXIT_FAILURE, errno, _("write error"));
674 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);