(libfetish_a_SOURCES): Add mmap-stack.h.
[coreutils.git] / src / tac.c
blobc7d9e258d3a10276a9529b60ed733fb6f7ca3062
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2002 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
26 follows in the file.
28 Options:
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
35 tac -r -s '.\|
36 ' file */
38 #include <config.h>
40 #include <stdio.h>
41 #include <getopt.h>
42 #include <sys/types.h>
43 #include "system.h"
44 #include "closeout.h"
46 #include <regex.h>
48 #include "error.h"
49 #include "safe-read.h"
51 /* The official name of this program (e.g., no `g' prefix). */
52 #define PROGRAM_NAME "tac"
54 #define AUTHORS N_ ("Jay Lepreau and David MacKenzie")
56 #if defined __MSDOS__ || defined _WIN32
57 /* Define this to non-zero on systems for which the regular mechanism
58 (of unlinking an open file and expecting to be able to write, seek
59 back to the beginning, then reread it) doesn't work. E.g., on Windows
60 and DOS systems. */
61 # define DONT_UNLINK_WHILE_OPEN 1
62 #endif
65 #ifndef DEFAULT_TMPDIR
66 # define DEFAULT_TMPDIR "/tmp"
67 #endif
69 /* The number of bytes per atomic read. */
70 #define INITIAL_READSIZE 8192
72 /* The number of bytes per atomic write. */
73 #define WRITESIZE 8192
75 /* The name this program was run with. */
76 char *program_name;
78 /* The string that separates the records of the file. */
79 static char *separator;
81 /* If nonzero, print `separator' along with the record preceding it
82 in the file; otherwise with the record following it. */
83 static int separator_ends_record;
85 /* 0 if `separator' is to be matched as a regular expression;
86 otherwise, the length of `separator', used as a sentinel to
87 stop the search. */
88 static size_t sentinel_length;
90 /* The length of a match with `separator'. If `sentinel_length' is 0,
91 `match_length' is computed every time a match succeeds;
92 otherwise, it is simply the length of `separator'. */
93 static int match_length;
95 /* The input buffer. */
96 static char *G_buffer;
98 /* The number of bytes to read at once into `buffer'. */
99 static size_t read_size;
101 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
102 The extra 2 bytes allow `past_end' to have a value beyond the
103 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
104 static unsigned G_buffer_size;
106 /* The compiled regular expression representing `separator'. */
107 static struct re_pattern_buffer compiled_separator;
109 static struct option const longopts[] =
111 {"before", no_argument, NULL, 'b'},
112 {"regex", no_argument, NULL, 'r'},
113 {"separator", required_argument, NULL, 's'},
114 {GETOPT_HELP_OPTION_DECL},
115 {GETOPT_VERSION_OPTION_DECL},
116 {NULL, 0, NULL, 0}
119 void
120 usage (int status)
122 if (status != 0)
123 fprintf (stderr, _("Try `%s --help' for more information.\n"),
124 program_name);
125 else
127 printf (_("\
128 Usage: %s [OPTION]... [FILE]...\n\
130 program_name);
131 fputs (_("\
132 Write each FILE to standard output, last line first.\n\
133 With no FILE, or when FILE is -, read standard input.\n\
135 "), stdout);
136 fputs (_("\
137 Mandatory arguments to long options are mandatory for short options too.\n\
138 "), stdout);
139 fputs (_("\
140 -b, --before attach the separator before instead of after\n\
141 -r, --regex interpret the separator as a regular expression\n\
142 -s, --separator=STRING use STRING as the separator instead of newline\n\
143 "), stdout);
144 fputs (HELP_OPTION_DESCRIPTION, stdout);
145 fputs (VERSION_OPTION_DESCRIPTION, stdout);
146 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
148 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
151 /* Print the characters from START to PAST_END - 1.
152 If START is NULL, just flush the buffer. */
154 static void
155 output (const char *start, const char *past_end)
157 static char buffer[WRITESIZE];
158 static size_t bytes_in_buffer = 0;
159 size_t bytes_to_add = past_end - start;
160 size_t bytes_available = WRITESIZE - bytes_in_buffer;
162 if (start == 0)
164 fwrite (buffer, 1, bytes_in_buffer, stdout);
165 bytes_in_buffer = 0;
166 return;
169 /* Write out as many full buffers as possible. */
170 while (bytes_to_add >= bytes_available)
172 memcpy (buffer + bytes_in_buffer, start, bytes_available);
173 bytes_to_add -= bytes_available;
174 start += bytes_available;
175 fwrite (buffer, 1, WRITESIZE, stdout);
176 bytes_in_buffer = 0;
177 bytes_available = WRITESIZE;
180 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
181 bytes_in_buffer += bytes_to_add;
184 /* Print in reverse the file open on descriptor FD for reading FILE.
185 Return 0 if ok, 1 if an error occurs. */
187 static int
188 tac_seekable (int input_fd, const char *file)
190 /* Pointer to the location in `G_buffer' where the search for
191 the next separator will begin. */
192 char *match_start;
194 /* Pointer to one past the rightmost character in `G_buffer' that
195 has not been printed yet. */
196 char *past_end;
198 /* Length of the record growing in `G_buffer'. */
199 size_t saved_record_size;
201 /* Offset in the file of the next read. */
202 off_t file_pos;
204 /* Nonzero if `output' has not been called yet for any file.
205 Only used when the separator is attached to the preceding record. */
206 int first_time = 1;
207 char first_char = *separator; /* Speed optimization, non-regexp. */
208 char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
209 int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
210 struct re_registers regs;
212 /* Find the size of the input file. */
213 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
214 if (file_pos < 1)
215 return 0; /* It's an empty file. */
217 /* Arrange for the first read to lop off enough to leave the rest of the
218 file a multiple of `read_size'. Since `read_size' can change, this may
219 not always hold during the program run, but since it usually will, leave
220 it here for i/o efficiency (page/sector boundaries and all that).
221 Note: the efficiency gain has not been verified. */
222 saved_record_size = file_pos % read_size;
223 if (saved_record_size == 0)
224 saved_record_size = read_size;
225 file_pos -= saved_record_size;
226 /* `file_pos' now points to the start of the last (probably partial) block
227 in the input file. */
229 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
230 error (0, errno, "%s: seek failed", file);
232 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
234 error (0, errno, "%s", file);
235 return 1;
238 match_start = past_end = G_buffer + saved_record_size;
239 /* For non-regexp search, move past impossible positions for a match. */
240 if (sentinel_length)
241 match_start -= match_length1;
243 for (;;)
245 /* Search backward from `match_start' - 1 to `G_buffer' for a match
246 with `separator'; for speed, use strncmp if `separator' contains no
247 metacharacters.
248 If the match succeeds, set `match_start' to point to the start of
249 the match and `match_length' to the length of the match.
250 Otherwise, make `match_start' < `G_buffer'. */
251 if (sentinel_length == 0)
253 int i = match_start - G_buffer;
254 int ret;
256 ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, &regs);
257 if (ret == -1)
258 match_start = G_buffer - 1;
259 else if (ret == -2)
261 error (EXIT_FAILURE, 0,
262 _("error in regular expression search"));
264 else
266 match_start = G_buffer + regs.start[0];
267 match_length = regs.end[0] - regs.start[0];
270 else
272 /* `match_length' is constant for non-regexp boundaries. */
273 while (*--match_start != first_char
274 || (match_length1 && strncmp (match_start + 1, separator1,
275 match_length1)))
276 /* Do nothing. */ ;
279 /* Check whether we backed off the front of `G_buffer' without finding
280 a match for `separator'. */
281 if (match_start < G_buffer)
283 if (file_pos == 0)
285 /* Hit the beginning of the file; print the remaining record. */
286 output (G_buffer, past_end);
287 return 0;
290 saved_record_size = past_end - G_buffer;
291 if (saved_record_size > read_size)
293 /* `G_buffer_size' is about twice `read_size', so since
294 we want to read in another `read_size' bytes before
295 the data already in `G_buffer', we need to increase
296 `G_buffer_size'. */
297 char *newbuffer;
298 int offset = sentinel_length ? sentinel_length : 1;
300 read_size *= 2;
301 G_buffer_size = read_size * 2 + sentinel_length + 2;
302 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
303 newbuffer += offset;
304 /* Adjust the pointers for the new buffer location. */
305 match_start += newbuffer - G_buffer;
306 past_end += newbuffer - G_buffer;
307 G_buffer = newbuffer;
310 /* Back up to the start of the next bufferfull of the file. */
311 if (file_pos >= read_size)
312 file_pos -= read_size;
313 else
315 read_size = file_pos;
316 file_pos = 0;
318 lseek (input_fd, file_pos, SEEK_SET);
320 /* Shift the pending record data right to make room for the new.
321 The source and destination regions probably overlap. */
322 memmove (G_buffer + read_size, G_buffer, saved_record_size);
323 past_end = G_buffer + read_size + saved_record_size;
324 /* For non-regexp searches, avoid unneccessary scanning. */
325 if (sentinel_length)
326 match_start = G_buffer + read_size;
327 else
328 match_start = past_end;
330 if (safe_read (input_fd, G_buffer, read_size) != read_size)
332 error (0, errno, "%s", file);
333 return 1;
336 else
338 /* Found a match of `separator'. */
339 if (separator_ends_record)
341 char *match_end = match_start + match_length;
343 /* If this match of `separator' isn't at the end of the
344 file, print the record. */
345 if (first_time == 0 || match_end != past_end)
346 output (match_end, past_end);
347 past_end = match_end;
348 first_time = 0;
350 else
352 output (match_start, past_end);
353 past_end = match_start;
356 /* For non-regex matching, we can back up. */
357 if (sentinel_length > 0)
358 match_start -= match_length - 1;
363 /* Print FILE in reverse.
364 Return 0 if ok, 1 if an error occurs. */
366 static int
367 tac_file (const char *file)
369 int errors;
370 FILE *in;
372 in = fopen (file, "r");
373 if (in == NULL)
375 error (0, errno, "%s", file);
376 return 1;
378 SET_BINARY (fileno (in));
379 errors = tac_seekable (fileno (in), file);
380 if (ferror (in) || fclose (in) == EOF)
382 error (0, errno, "%s", file);
383 return 1;
385 return errors;
388 #if DONT_UNLINK_WHILE_OPEN
390 static const char *file_to_remove;
391 static FILE *fp_to_close;
393 static void
394 unlink_tempfile (void)
396 fclose (fp_to_close);
397 unlink (file_to_remove);
400 static void
401 record_tempfile (const char *fn, FILE *fp)
403 if (!file_to_remove)
405 file_to_remove = fn;
406 fp_to_close = fp;
407 atexit (unlink_tempfile);
411 #endif
413 /* Make a copy of the standard input in `FIXME'. */
415 static void
416 save_stdin (FILE **g_tmp, char **g_tempfile)
418 static char *template = NULL;
419 static char *tempdir;
420 char *tempfile;
421 FILE *tmp;
422 int fd;
424 if (template == NULL)
426 tempdir = getenv ("TMPDIR");
427 if (tempdir == NULL)
428 tempdir = DEFAULT_TMPDIR;
429 template = xmalloc (strlen (tempdir) + 11);
431 sprintf (template, "%s/tacXXXXXX", tempdir);
432 tempfile = template;
433 fd = mkstemp (template);
434 if (fd == -1)
435 error (EXIT_FAILURE, errno, "%s", tempfile);
437 tmp = fdopen (fd, "w+");
438 if (tmp == NULL)
439 error (EXIT_FAILURE, errno, "%s", tempfile);
441 #if DONT_UNLINK_WHILE_OPEN
442 record_tempfile (tempfile, tmp);
443 #else
444 unlink (tempfile);
445 #endif
447 while (1)
449 size_t bytes_read = safe_read (STDIN_FILENO, G_buffer, read_size);
450 if (bytes_read == 0)
451 break;
452 if (bytes_read == SAFE_READ_ERROR)
453 error (EXIT_FAILURE, errno, _("stdin: read error"));
455 if (fwrite (G_buffer, 1, bytes_read, tmp) != bytes_read)
456 break;
459 if (ferror (tmp) || fflush (tmp) == EOF)
460 error (EXIT_FAILURE, errno, "%s", tempfile);
462 SET_BINARY (fileno (tmp));
463 *g_tmp = tmp;
464 *g_tempfile = tempfile;
467 /* Print the standard input in reverse, saving it to temporary
468 file first if it is a pipe.
469 Return 0 if ok, 1 if an error occurs. */
471 static int
472 tac_stdin (void)
474 int errors;
475 struct stat stats;
477 /* No tempfile is needed for "tac < file".
478 Use fstat instead of checking for errno == ESPIPE because
479 lseek doesn't work on some special files but doesn't return an
480 error, either. */
481 if (fstat (STDIN_FILENO, &stats))
483 error (0, errno, _("standard input"));
484 return 1;
487 if (S_ISREG (stats.st_mode))
489 errors = tac_seekable (fileno (stdin), _("standard input"));
491 else
493 FILE *tmp_stream;
494 char *tmp_file;
495 save_stdin (&tmp_stream, &tmp_file);
496 errors = tac_seekable (fileno (tmp_stream), tmp_file);
499 return errors;
502 #if 0
503 /* BUF_END points one byte past the end of the buffer to be searched. */
505 static void *
506 memrchr (const char *buf_start, const char *buf_end, int c)
508 const char *p = buf_end;
509 while (buf_start <= --p)
511 if (*(const unsigned char *) p == c)
512 return (void *) p;
514 return NULL;
517 /* FIXME: describe */
519 static int
520 tac_mem (const char *buf, size_t n_bytes, FILE *out)
522 const char *nl;
523 const char *bol;
525 if (n_bytes == 0)
526 return 0;
528 nl = memrchr (buf, buf + n_bytes, '\n');
529 bol = (nl == NULL ? buf : nl + 1);
531 /* If the last line of the input file has no terminating newline,
532 treat it as a special case. */
533 if (bol < buf + n_bytes)
535 /* Print out the line from bol to end of input. */
536 fwrite (bol, 1, (buf + n_bytes) - bol, out);
538 /* Add a newline here. Otherwise, the first and second lines
539 of output would appear to have been joined. */
540 fputc ('\n', out);
543 while ((nl = memrchr (buf, bol - 1, '\n')) != NULL)
545 /* Output the line (which includes a trailing newline)
546 from NL+1 to BOL-1. */
547 fwrite (nl + 1, 1, bol - (nl + 1), out);
549 bol = nl + 1;
552 /* If there's anything left, output the last line: BUF .. BOL-1.
553 When the first byte of the input is a newline, there is nothing
554 left to do here. */
555 if (buf < bol)
556 fwrite (buf, 1, bol - buf, out);
558 /* FIXME: this is work in progress.... */
559 return ferror (out);
562 /* FIXME: describe */
564 static int
565 tac_stdin_to_mem (void)
567 char *buf = NULL;
568 size_t bufsiz = 8 * BUFSIZ;
569 size_t delta = 8 * BUFSIZ;
570 size_t n_bytes = 0;
572 while (1)
574 size_t bytes_read;
575 if (buf == NULL)
576 buf = (char *) malloc (bufsiz);
577 else
578 buf = (char *) realloc (buf, bufsiz);
580 if (buf == NULL)
582 /* Free the buffer and fall back on the code that relies on a
583 temporary file. */
584 free (buf);
585 /* FIXME */
586 abort ();
588 bytes_read = safe_read (STDIN_FILENO, buf + n_bytes, bufsiz - n_bytes);
589 if (bytes_read == 0)
590 break;
591 if (bytes_read == SAFE_READ_ERROR)
592 error (EXIT_FAILURE, errno, _("stdin: read error"));
593 n_bytes += bytes_read;
595 bufsiz += delta;
598 tac_mem (buf, n_bytes, stdout);
600 return 0;
602 #endif
605 main (int argc, char **argv)
607 const char *error_message; /* Return value from re_compile_pattern. */
608 int optc, errors;
609 int have_read_stdin = 0;
611 program_name = argv[0];
612 setlocale (LC_ALL, "");
613 bindtextdomain (PACKAGE, LOCALEDIR);
614 textdomain (PACKAGE);
616 atexit (close_stdout);
618 errors = 0;
619 separator = "\n";
620 sentinel_length = 1;
621 separator_ends_record = 1;
623 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
625 switch (optc)
627 case 0:
628 break;
629 case 'b':
630 separator_ends_record = 0;
631 break;
632 case 'r':
633 sentinel_length = 0;
634 break;
635 case 's':
636 separator = optarg;
637 if (*separator == 0)
638 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
639 break;
640 case_GETOPT_HELP_CHAR;
641 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
642 default:
643 usage (EXIT_FAILURE);
647 if (sentinel_length == 0)
649 compiled_separator.allocated = 100;
650 compiled_separator.buffer = (unsigned char *)
651 xmalloc (compiled_separator.allocated);
652 compiled_separator.fastmap = xmalloc (256);
653 compiled_separator.translate = 0;
654 error_message = re_compile_pattern (separator, strlen (separator),
655 &compiled_separator);
656 if (error_message)
657 error (EXIT_FAILURE, 0, "%s", error_message);
659 else
660 match_length = sentinel_length = strlen (separator);
662 read_size = INITIAL_READSIZE;
663 /* A precaution that will probably never be needed. */
664 while (sentinel_length * 2 >= read_size)
665 read_size *= 2;
666 G_buffer_size = read_size * 2 + sentinel_length + 2;
667 G_buffer = xmalloc (G_buffer_size);
668 if (sentinel_length)
670 strcpy (G_buffer, separator);
671 G_buffer += sentinel_length;
673 else
675 ++G_buffer;
678 if (optind == argc)
680 have_read_stdin = 1;
681 /* We need binary I/O, since `tac' relies
682 on `lseek' and byte counts. */
683 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
684 errors = tac_stdin ();
686 else
688 for (; optind < argc; ++optind)
690 if (STREQ (argv[optind], "-"))
692 have_read_stdin = 1;
693 SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
694 errors |= tac_stdin ();
696 else
698 /* Binary output will leave the lines' ends (NL or
699 CR/LF) intact when the output is a disk file.
700 Writing a file with CR/LF pairs at end of lines in
701 text mode has no visible effect on console output,
702 since two CRs in a row are just like one CR. */
703 SET_BINARY (STDOUT_FILENO);
704 errors |= tac_file (argv[optind]);
709 /* Flush the output buffer. */
710 output ((char *) NULL, (char *) NULL);
712 if (have_read_stdin && close (STDIN_FILENO) < 0)
713 error (EXIT_FAILURE, errno, "-");
714 exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);