1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
50 #include "safe-read.h"
53 /* The official name of this program (e.g., no `g' prefix). */
54 #define PROGRAM_NAME "tac"
56 #define AUTHORS "Jay Lepreau", "David MacKenzie"
58 #if defined __MSDOS__ || defined _WIN32
59 /* Define this to non-zero on systems for which the regular mechanism
60 (of unlinking an open file and expecting to be able to write, seek
61 back to the beginning, then reread it) doesn't work. E.g., on Windows
63 # define DONT_UNLINK_WHILE_OPEN 1
67 #ifndef DEFAULT_TMPDIR
68 # define DEFAULT_TMPDIR "/tmp"
71 /* The number of bytes per atomic read. */
72 #define INITIAL_READSIZE 8192
74 /* The number of bytes per atomic write. */
75 #define WRITESIZE 8192
77 /* The name this program was run with. */
80 /* The string that separates the records of the file. */
81 static char const *separator
;
83 /* True if we have ever read standard input. */
84 static bool have_read_stdin
= false;
86 /* If true, print `separator' along with the record preceding it
87 in the file; otherwise with the record following it. */
88 static bool separator_ends_record
;
90 /* 0 if `separator' is to be matched as a regular expression;
91 otherwise, the length of `separator', used as a sentinel to
93 static size_t sentinel_length
;
95 /* The length of a match with `separator'. If `sentinel_length' is 0,
96 `match_length' is computed every time a match succeeds;
97 otherwise, it is simply the length of `separator'. */
98 static size_t match_length
;
100 /* The input buffer. */
101 static char *G_buffer
;
103 /* The number of bytes to read at once into `buffer'. */
104 static size_t read_size
;
106 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
107 The extra 2 bytes allow `past_end' to have a value beyond the
108 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
109 static size_t G_buffer_size
;
111 /* The compiled regular expression representing `separator'. */
112 static struct re_pattern_buffer compiled_separator
;
113 static char compiled_separator_fastmap
[UCHAR_MAX
+ 1];
115 static struct option
const longopts
[] =
117 {"before", no_argument
, NULL
, 'b'},
118 {"regex", no_argument
, NULL
, 'r'},
119 {"separator", required_argument
, NULL
, 's'},
120 {GETOPT_HELP_OPTION_DECL
},
121 {GETOPT_VERSION_OPTION_DECL
},
128 if (status
!= EXIT_SUCCESS
)
129 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
134 Usage: %s [OPTION]... [FILE]...\n\
138 Write each FILE to standard output, last line first.\n\
139 With no FILE, or when FILE is -, read standard input.\n\
143 Mandatory arguments to long options are mandatory for short options too.\n\
146 -b, --before attach the separator before instead of after\n\
147 -r, --regex interpret the separator as a regular expression\n\
148 -s, --separator=STRING use STRING as the separator instead of newline\n\
150 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
151 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
152 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT
);
157 /* Print the characters from START to PAST_END - 1.
158 If START is NULL, just flush the buffer. */
161 output (const char *start
, const char *past_end
)
163 static char buffer
[WRITESIZE
];
164 static size_t bytes_in_buffer
= 0;
165 size_t bytes_to_add
= past_end
- start
;
166 size_t bytes_available
= WRITESIZE
- bytes_in_buffer
;
170 fwrite (buffer
, 1, bytes_in_buffer
, stdout
);
175 /* Write out as many full buffers as possible. */
176 while (bytes_to_add
>= bytes_available
)
178 memcpy (buffer
+ bytes_in_buffer
, start
, bytes_available
);
179 bytes_to_add
-= bytes_available
;
180 start
+= bytes_available
;
181 fwrite (buffer
, 1, WRITESIZE
, stdout
);
183 bytes_available
= WRITESIZE
;
186 memcpy (buffer
+ bytes_in_buffer
, start
, bytes_to_add
);
187 bytes_in_buffer
+= bytes_to_add
;
190 /* Print in reverse the file open on descriptor FD for reading FILE.
191 Return true if successful. */
194 tac_seekable (int input_fd
, const char *file
)
196 /* Pointer to the location in `G_buffer' where the search for
197 the next separator will begin. */
200 /* Pointer to one past the rightmost character in `G_buffer' that
201 has not been printed yet. */
204 /* Length of the record growing in `G_buffer'. */
205 size_t saved_record_size
;
207 /* Offset in the file of the next read. */
210 /* True if `output' has not been called yet for any file.
211 Only used when the separator is attached to the preceding record. */
212 bool first_time
= true;
213 char first_char
= *separator
; /* Speed optimization, non-regexp. */
214 char const *separator1
= separator
+ 1; /* Speed optimization, non-regexp. */
215 size_t match_length1
= match_length
- 1; /* Speed optimization, non-regexp. */
216 struct re_registers regs
;
218 /* Find the size of the input file. */
219 file_pos
= lseek (input_fd
, (off_t
) 0, SEEK_END
);
221 return true; /* It's an empty file. */
223 /* Arrange for the first read to lop off enough to leave the rest of the
224 file a multiple of `read_size'. Since `read_size' can change, this may
225 not always hold during the program run, but since it usually will, leave
226 it here for i/o efficiency (page/sector boundaries and all that).
227 Note: the efficiency gain has not been verified. */
228 saved_record_size
= file_pos
% read_size
;
229 if (saved_record_size
== 0)
230 saved_record_size
= read_size
;
231 file_pos
-= saved_record_size
;
232 /* `file_pos' now points to the start of the last (probably partial) block
233 in the input file. */
235 if (lseek (input_fd
, file_pos
, SEEK_SET
) < 0)
236 error (0, errno
, _("%s: seek failed"), quotearg_colon (file
));
238 if (safe_read (input_fd
, G_buffer
, saved_record_size
) != saved_record_size
)
240 error (0, errno
, _("%s: read error"), quotearg_colon (file
));
244 match_start
= past_end
= G_buffer
+ saved_record_size
;
245 /* For non-regexp search, move past impossible positions for a match. */
247 match_start
-= match_length1
;
251 /* Search backward from `match_start' - 1 to `G_buffer' for a match
252 with `separator'; for speed, use strncmp if `separator' contains no
254 If the match succeeds, set `match_start' to point to the start of
255 the match and `match_length' to the length of the match.
256 Otherwise, make `match_start' < `G_buffer'. */
257 if (sentinel_length
== 0)
259 size_t i
= match_start
- G_buffer
;
261 regoff_t range
= 1 - ri
;
265 error (EXIT_FAILURE
, 0, _("record too large"));
268 || ((ret
= re_search (&compiled_separator
, G_buffer
,
269 i
, i
- 1, range
, ®s
))
271 match_start
= G_buffer
- 1;
274 error (EXIT_FAILURE
, 0,
275 _("error in regular expression search"));
279 match_start
= G_buffer
+ regs
.start
[0];
280 match_length
= regs
.end
[0] - regs
.start
[0];
285 /* `match_length' is constant for non-regexp boundaries. */
286 while (*--match_start
!= first_char
287 || (match_length1
&& strncmp (match_start
+ 1, separator1
,
292 /* Check whether we backed off the front of `G_buffer' without finding
293 a match for `separator'. */
294 if (match_start
< G_buffer
)
298 /* Hit the beginning of the file; print the remaining record. */
299 output (G_buffer
, past_end
);
303 saved_record_size
= past_end
- G_buffer
;
304 if (saved_record_size
> read_size
)
306 /* `G_buffer_size' is about twice `read_size', so since
307 we want to read in another `read_size' bytes before
308 the data already in `G_buffer', we need to increase
311 size_t offset
= sentinel_length
? sentinel_length
: 1;
312 ptrdiff_t match_start_offset
= match_start
- G_buffer
;
313 ptrdiff_t past_end_offset
= past_end
- G_buffer
;
314 size_t old_G_buffer_size
= G_buffer_size
;
317 G_buffer_size
= read_size
* 2 + sentinel_length
+ 2;
318 if (G_buffer_size
< old_G_buffer_size
)
320 newbuffer
= xrealloc (G_buffer
- offset
, G_buffer_size
);
322 /* Adjust the pointers for the new buffer location. */
323 match_start
= newbuffer
+ match_start_offset
;
324 past_end
= newbuffer
+ past_end_offset
;
325 G_buffer
= newbuffer
;
328 /* Back up to the start of the next bufferfull of the file. */
329 if (file_pos
>= read_size
)
330 file_pos
-= read_size
;
333 read_size
= file_pos
;
336 if (lseek (input_fd
, file_pos
, SEEK_SET
) < 0)
337 error (0, errno
, _("%s: seek failed"), quotearg_colon (file
));
339 /* Shift the pending record data right to make room for the new.
340 The source and destination regions probably overlap. */
341 memmove (G_buffer
+ read_size
, G_buffer
, saved_record_size
);
342 past_end
= G_buffer
+ read_size
+ saved_record_size
;
343 /* For non-regexp searches, avoid unneccessary scanning. */
345 match_start
= G_buffer
+ read_size
;
347 match_start
= past_end
;
349 if (safe_read (input_fd
, G_buffer
, read_size
) != read_size
)
351 error (0, errno
, _("%s: read error"), quotearg_colon (file
));
357 /* Found a match of `separator'. */
358 if (separator_ends_record
)
360 char *match_end
= match_start
+ match_length
;
362 /* If this match of `separator' isn't at the end of the
363 file, print the record. */
364 if (!first_time
|| match_end
!= past_end
)
365 output (match_end
, past_end
);
366 past_end
= match_end
;
371 output (match_start
, past_end
);
372 past_end
= match_start
;
375 /* For non-regex matching, we can back up. */
376 if (sentinel_length
> 0)
377 match_start
-= match_length
- 1;
382 #if DONT_UNLINK_WHILE_OPEN
384 /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
385 Using atexit like this is wrong, since it can fail
386 when called e.g. 32 or more times.
387 But this isn't a big deal, since the code is used only on WOE/DOS
388 systems, and few people invoke tac on that many nonseekable files. */
390 static const char *file_to_remove
;
391 static FILE *fp_to_close
;
394 unlink_tempfile (void)
396 fclose (fp_to_close
);
397 unlink (file_to_remove
);
401 record_or_unlink_tempfile (char const *fn
, FILE *fp
)
407 atexit (unlink_tempfile
);
414 record_or_unlink_tempfile (char const *fn
, FILE *fp ATTRIBUTE_UNUSED
)
421 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
422 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
423 and file name. Return true if successful. */
426 copy_to_temp (FILE **g_tmp
, char **g_tempfile
, int input_fd
, char const *file
)
428 static char *template = NULL
;
429 static char const *tempdir
;
434 if (template == NULL
)
436 char const * const Template
= "%s/tacXXXXXX";
437 tempdir
= getenv ("TMPDIR");
439 tempdir
= DEFAULT_TMPDIR
;
441 /* Subtract 2 for `%s' and add 1 for the trailing NUL byte. */
442 template = xmalloc (strlen (tempdir
) + strlen (Template
) - 2 + 1);
443 sprintf (template, Template
, tempdir
);
446 /* FIXME: there's a small window between a successful mkstemp call
447 and the unlink that's performed by record_or_unlink_tempfile.
448 If we're interrupted in that interval, this code fails to remove
449 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
450 the window is much larger -- it extends to the atexit-called
452 FIXME: clean up upon fatal signal. Don't block them, in case
453 $TMPFILE is a remote file system. */
456 fd
= mkstemp (template);
459 error (0, errno
, _("cannot create temporary file %s"), quote (tempfile
));
463 tmp
= fdopen (fd
, (O_BINARY
? "w+b" : "w+"));
466 error (0, errno
, _("cannot open %s for writing"), quote (tempfile
));
472 record_or_unlink_tempfile (tempfile
, tmp
);
476 size_t bytes_read
= safe_read (input_fd
, G_buffer
, read_size
);
479 if (bytes_read
== SAFE_READ_ERROR
)
481 error (0, errno
, _("%s: read error"), quotearg_colon (file
));
485 if (fwrite (G_buffer
, 1, bytes_read
, tmp
) != bytes_read
)
487 error (0, errno
, _("%s: write error"), quotearg_colon (tempfile
));
492 if (fflush (tmp
) != 0)
494 error (0, errno
, _("%s: write error"), quotearg_colon (tempfile
));
499 *g_tempfile
= tempfile
;
507 /* Copy INPUT_FD to a temporary, then tac that file.
508 Return true if successful. */
511 tac_nonseekable (int input_fd
, const char *file
)
515 return (copy_to_temp (&tmp_stream
, &tmp_file
, input_fd
, file
)
516 && tac_seekable (fileno (tmp_stream
), tmp_file
));
519 /* Print FILE in reverse, copying it to a temporary
520 file first if it is not seekable.
521 Return true if successful. */
524 tac_file (const char *filename
)
529 bool is_stdin
= STREQ (filename
, "-");
533 have_read_stdin
= true;
535 filename
= _("standard input");
536 if (O_BINARY
&& ! isatty (STDIN_FILENO
))
537 freopen (NULL
, "rb", stdin
);
541 fd
= open (filename
, O_RDONLY
| O_BINARY
);
544 error (0, errno
, _("cannot open %s for reading"), quote (filename
));
549 file_size
= lseek (fd
, (off_t
) 0, SEEK_END
);
551 ok
= (file_size
< 0 || isatty (fd
)
552 ? tac_nonseekable (fd
, filename
)
553 : tac_seekable (fd
, filename
));
555 if (!is_stdin
&& close (fd
) != 0)
557 error (0, errno
, _("%s: read error"), quotearg_colon (filename
));
564 main (int argc
, char **argv
)
566 const char *error_message
; /* Return value from re_compile_pattern. */
569 size_t half_buffer_size
;
571 /* Initializer for file_list if no file-arguments
572 were specified on the command line. */
573 static char const *const default_file_list
[] = {"-", NULL
};
574 char const *const *file
;
576 initialize_main (&argc
, &argv
);
577 program_name
= argv
[0];
578 setlocale (LC_ALL
, "");
579 bindtextdomain (PACKAGE
, LOCALEDIR
);
580 textdomain (PACKAGE
);
582 atexit (close_stdout
);
586 separator_ends_record
= true;
588 while ((optc
= getopt_long (argc
, argv
, "brs:", longopts
, NULL
)) != -1)
593 separator_ends_record
= false;
601 error (EXIT_FAILURE
, 0, _("separator cannot be empty"));
603 case_GETOPT_HELP_CHAR
;
604 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
606 usage (EXIT_FAILURE
);
610 if (sentinel_length
== 0)
612 compiled_separator
.buffer
= NULL
;
613 compiled_separator
.allocated
= 0;
614 compiled_separator
.fastmap
= compiled_separator_fastmap
;
615 compiled_separator
.translate
= NULL
;
616 error_message
= re_compile_pattern (separator
, strlen (separator
),
617 &compiled_separator
);
619 error (EXIT_FAILURE
, 0, "%s", error_message
);
622 match_length
= sentinel_length
= strlen (separator
);
624 read_size
= INITIAL_READSIZE
;
625 while (sentinel_length
>= read_size
/ 2)
627 if (SIZE_MAX
/ 2 < read_size
)
631 half_buffer_size
= read_size
+ sentinel_length
+ 1;
632 G_buffer_size
= 2 * half_buffer_size
;
633 if (! (read_size
< half_buffer_size
&& half_buffer_size
< G_buffer_size
))
635 G_buffer
= xmalloc (G_buffer_size
);
638 strcpy (G_buffer
, separator
);
639 G_buffer
+= sentinel_length
;
646 file
= (optind
< argc
647 ? (char const *const *) &argv
[optind
]
648 : default_file_list
);
650 if (O_BINARY
&& ! isatty (STDOUT_FILENO
))
651 freopen (NULL
, "wb", stdout
);
656 for (i
= 0; file
[i
]; ++i
)
657 ok
&= tac_file (file
[i
]);
660 /* Flush the output buffer. */
661 output ((char *) NULL
, (char *) NULL
);
663 if (have_read_stdin
&& close (STDIN_FILENO
) < 0)
664 error (EXIT_FAILURE
, errno
, "-");
665 exit (ok
? EXIT_SUCCESS
: EXIT_FAILURE
);