1 /* tac - concatenate and print files in reverse
2 Copyright (C) 88, 89, 90, 91, 95, 96, 1997, 1998 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
19 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
21 /* Copy each FILE, or the standard input if none are given or when a
22 FILE name of "-" is encountered, to the standard output with the
23 order of the records reversed. The records are separated by
24 instances of a string, or a newline if none is given. By default, the
25 separator string is attached to the end of the record that it
29 -b, --before The separator is attached to the beginning
30 of the record that it precedes in the file.
31 -r, --regex The separator is a regular expression.
32 -s, --separator=separator Use SEPARATOR as the record separator.
34 To reverse a file byte by byte, use (in bash, ksh, or sh):
42 #include <sys/types.h>
53 #ifndef DEFAULT_TMPDIR
54 # define DEFAULT_TMPDIR "/tmp"
57 /* The number of bytes per atomic read. */
58 #define INITIAL_READSIZE 8192
60 /* The number of bytes per atomic write. */
61 #define WRITESIZE 8192
66 /* The name this program was run with. */
69 /* The string that separates the records of the file. */
70 static char *separator
;
72 /* If nonzero, print `separator' along with the record preceding it
73 in the file; otherwise with the record following it. */
74 static int separator_ends_record
;
76 /* 0 if `separator' is to be matched as a regular expression;
77 otherwise, the length of `separator', used as a sentinel to
79 static int sentinel_length
;
81 /* The length of a match with `separator'. If `sentinel_length' is 0,
82 `match_length' is computed every time a match succeeds;
83 otherwise, it is simply the length of `separator'. */
84 static int match_length
;
86 /* The input buffer. */
87 static char *G_buffer
;
89 /* The number of bytes to read at once into `buffer'. */
90 static unsigned read_size
;
92 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
93 The extra 2 bytes allow `past_end' to have a value beyond the
94 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
95 static unsigned G_buffer_size
;
97 /* The compiled regular expression representing `separator'. */
98 static struct re_pattern_buffer compiled_separator
;
100 /* If nonzero, display usage information and exit. */
101 static int show_help
;
103 /* If nonzero, print the version on standard output then exit. */
104 static int show_version
;
106 static struct option
const longopts
[] =
108 {"before", no_argument
, &separator_ends_record
, 0},
109 {"regex", no_argument
, &sentinel_length
, 0},
110 {"separator", required_argument
, NULL
, 's'},
111 {"help", no_argument
, &show_help
, 1},
112 {"version", no_argument
, &show_version
, 1},
116 /* Read LEN bytes at PTR from descriptor DESC, retrying if interrupted.
117 Return the actual number of bytes read, zero for EOF, or negative
121 safe_read (int desc
, char *ptr
, int len
)
131 n_chars
= read (desc
, ptr
, len
);
133 while (n_chars
< 0 && errno
== EINTR
);
135 n_chars
= read (desc
, ptr
, len
);
145 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
150 Usage: %s [OPTION]... [FILE]...\n\
154 Write each FILE to standard output, last line first.\n\
155 With no FILE, or when FILE is -, read standard input.\n\
157 -b, --before attach the separator before instead of after\n\
158 -r, --regex interpret the separator as a regular expression\n\
159 -s, --separator=STRING use STRING as the separator instead of newline\n\
160 --help display this help and exit\n\
161 --version output version information and exit\n\
163 puts (_("\nReport bugs to <textutils-bugs@gnu.org>."));
165 exit (status
== 0 ? EXIT_SUCCESS
: EXIT_FAILURE
);
168 /* Print the characters from START to PAST_END - 1.
169 If START is NULL, just flush the buffer. */
172 output (const char *start
, const char *past_end
)
174 static char buffer
[WRITESIZE
];
175 static int bytes_in_buffer
= 0;
176 int bytes_to_add
= past_end
- start
;
177 int bytes_available
= WRITESIZE
- bytes_in_buffer
;
181 fwrite (buffer
, 1, bytes_in_buffer
, stdout
);
186 /* Write out as many full buffers as possible. */
187 while (bytes_to_add
>= bytes_available
)
189 memcpy (buffer
+ bytes_in_buffer
, start
, bytes_available
);
190 bytes_to_add
-= bytes_available
;
191 start
+= bytes_available
;
192 fwrite (buffer
, 1, WRITESIZE
, stdout
);
194 bytes_available
= WRITESIZE
;
197 memcpy (buffer
+ bytes_in_buffer
, start
, bytes_to_add
);
198 bytes_in_buffer
+= bytes_to_add
;
201 /* Print in reverse the file open on descriptor FD for reading FILE.
202 Return 0 if ok, 1 if an error occurs. */
205 tac_stream (FILE *in
, const char *file
)
207 /* Pointer to the location in `G_buffer' where the search for
208 the next separator will begin. */
211 /* Pointer to one past the rightmost character in `G_buffer' that
212 has not been printed yet. */
215 /* Length of the record growing in `G_buffer'. */
216 unsigned saved_record_size
;
218 /* Offset in the file of the next read. */
221 /* Nonzero if `output' has not been called yet for any file.
222 Only used when the separator is attached to the preceding record. */
224 char first_char
= *separator
; /* Speed optimization, non-regexp. */
225 char *separator1
= separator
+ 1; /* Speed optimization, non-regexp. */
226 int match_length1
= match_length
- 1; /* Speed optimization, non-regexp. */
227 struct re_registers regs
;
229 /* Find the size of the input file. */
230 file_pos
= lseek (fileno (in
), (off_t
) 0, SEEK_END
);
232 return 0; /* It's an empty file. */
234 /* Arrange for the first read to lop off enough to leave the rest of the
235 file a multiple of `read_size'. Since `read_size' can change, this may
236 not always hold during the program run, but since it usually will, leave
237 it here for i/o efficiency (page/sector boundaries and all that).
238 Note: the efficiency gain has not been verified. */
239 saved_record_size
= file_pos
% read_size
;
240 if (saved_record_size
== 0)
241 saved_record_size
= read_size
;
242 file_pos
-= saved_record_size
;
243 /* `file_pos' now points to the start of the last (probably partial) block
244 in the input file. */
246 lseek (fileno (in
), file_pos
, SEEK_SET
);
247 if (safe_read (fileno (in
), G_buffer
, saved_record_size
) != saved_record_size
)
249 error (0, errno
, "%s", file
);
253 match_start
= past_end
= G_buffer
+ saved_record_size
;
254 /* For non-regexp search, move past impossible positions for a match. */
256 match_start
-= match_length1
;
260 /* Search backward from `match_start' - 1 to `G_buffer' for a match
261 with `separator'; for speed, use strncmp if `separator' contains no
263 If the match succeeds, set `match_start' to point to the start of
264 the match and `match_length' to the length of the match.
265 Otherwise, make `match_start' < `G_buffer'. */
266 if (sentinel_length
== 0)
268 int i
= match_start
- G_buffer
;
271 ret
= re_search (&compiled_separator
, G_buffer
, i
, i
- 1, -i
, ®s
);
273 match_start
= G_buffer
- 1;
276 error (EXIT_FAILURE
, 0,
277 _("error in regular expression search"));
281 match_start
= G_buffer
+ regs
.start
[0];
282 match_length
= regs
.end
[0] - regs
.start
[0];
287 /* `match_length' is constant for non-regexp boundaries. */
288 while (*--match_start
!= first_char
289 || (match_length1
&& strncmp (match_start
+ 1, separator1
,
294 /* Check whether we backed off the front of `G_buffer' without finding
295 a match for `separator'. */
296 if (match_start
< G_buffer
)
300 /* Hit the beginning of the file; print the remaining record. */
301 output (G_buffer
, past_end
);
305 saved_record_size
= past_end
- G_buffer
;
306 if (saved_record_size
> read_size
)
308 /* `G_buffer_size' is about twice `read_size', so since
309 we want to read in another `read_size' bytes before
310 the data already in `G_buffer', we need to increase
313 int offset
= sentinel_length
? sentinel_length
: 1;
316 G_buffer_size
= read_size
* 2 + sentinel_length
+ 2;
317 newbuffer
= xrealloc (G_buffer
- offset
, G_buffer_size
);
319 /* Adjust the pointers for the new buffer location. */
320 match_start
+= newbuffer
- G_buffer
;
321 past_end
+= newbuffer
- G_buffer
;
322 G_buffer
= newbuffer
;
325 /* Back up to the start of the next bufferfull of the file. */
326 if (file_pos
>= read_size
)
327 file_pos
-= read_size
;
330 read_size
= file_pos
;
333 lseek (fileno (in
), file_pos
, SEEK_SET
);
335 /* Shift the pending record data right to make room for the new.
336 The source and destination regions probably overlap. */
337 memmove (G_buffer
+ read_size
, G_buffer
, saved_record_size
);
338 past_end
= G_buffer
+ read_size
+ saved_record_size
;
339 /* For non-regexp searches, avoid unneccessary scanning. */
341 match_start
= G_buffer
+ read_size
;
343 match_start
= past_end
;
345 if (safe_read (fileno (in
), G_buffer
, read_size
) != read_size
)
347 error (0, errno
, "%s", file
);
353 /* Found a match of `separator'. */
354 if (separator_ends_record
)
356 char *match_end
= match_start
+ match_length
;
358 /* If this match of `separator' isn't at the end of the
359 file, print the record. */
360 if (first_time
== 0 || match_end
!= past_end
)
361 output (match_end
, past_end
);
362 past_end
= match_end
;
367 output (match_start
, past_end
);
368 past_end
= match_start
;
370 match_start
-= match_length
- 1;
375 /* Print FILE in reverse.
376 Return 0 if ok, 1 if an error occurs. */
379 tac_file (const char *file
)
384 in
= fopen (file
, "r");
387 error (0, errno
, "%s", file
);
390 errors
= tac_stream (in
, file
);
391 if (ferror (in
) || fclose (in
) == EOF
)
393 error (0, errno
, "%s", file
);
399 /* Make a copy of the standard input in `FIXME'. */
402 save_stdin (FILE **g_tmp
, char **g_tempfile
)
404 static char *template = NULL
;
405 static char *tempdir
;
406 static char *tempfile
;
411 if (template == NULL
)
413 tempdir
= getenv ("TMPDIR");
415 tempdir
= DEFAULT_TMPDIR
;
416 template = xmalloc (strlen (tempdir
) + 11);
418 sprintf (template, "%s/tacXXXXXX", tempdir
);
419 tempfile
= mktemp (template);
421 fd
= creat (tempfile
, 0600);
422 if (fd
== -1 || (tmp
= fdopen (fd
, "rw")) == NULL
)
423 error (EXIT_FAILURE
, errno
, "%s", tempfile
);
424 tmp
= fdopen (fd
, "rw");
426 error (EXIT_FAILURE
, errno
, "%s", tempfile
);
429 while ((bytes_read
= safe_read (0, G_buffer
, read_size
)) > 0)
430 fwrite (G_buffer
, 1, bytes_read
, tmp
);
432 if (ferror (tmp
) || fflush (tmp
) == EOF
)
433 error (EXIT_FAILURE
, errno
, "%s", tempfile
);
435 if (fseek (tmp
, (long int) 0, SEEK_SET
))
436 error (EXIT_FAILURE
, errno
, "%s", tempfile
);
438 if (bytes_read
== -1)
439 error (EXIT_FAILURE
, errno
, _("read error"));
442 *g_tempfile
= tempfile
;
445 /* Print the standard input in reverse, saving it to temporary
446 file first if it is a pipe.
447 Return 0 if ok, 1 if an error occurs. */
455 /* No tempfile is needed for "tac < file".
456 Use fstat instead of checking for errno == ESPIPE because
457 lseek doesn't work on some special files but doesn't return an
459 if (fstat (0, &stats
))
461 error (0, errno
, _("standard input"));
465 if (S_ISREG (stats
.st_mode
))
467 errors
= tac_stream (stdin
, _("standard input"));
473 save_stdin (&tmp_stream
, &tmp_file
);
474 errors
= tac_stream (tmp_stream
, tmp_file
);
480 /* BUF_END_PLUS_ONE points one byte past the end of the buffer
484 memrchr (const char *buf_start
, const char *buf_end_plus_one
, int c
)
486 const char *p
= buf_end_plus_one
;
487 while (buf_start
<= --p
)
489 if (*(const unsigned char *) p
== c
)
496 tac_mem (const char *buf
, size_t n_bytes
, FILE *out
)
502 const char *nl
= memrchr (buf
, buf
+ n_bytes
, '\n');
503 const char *bol
= (nl
== NULL
? buf
: nl
+ 1);
505 /* If the last line of the input file has no terminating newline,
506 treat it as a special case. */
507 if (bol
< buf
+ n_bytes
)
509 /* Print out the line from bol to end of input. */
510 fwrite (bol
, 1, (buf
+ n_bytes
) - bol
, out
);
512 /* Add a newline here. Otherwise, the first and second lines
513 of output would appear to have been joined. */
517 while ((nl
= memrchr (buf
, bol
- 1, '\n')) != NULL
)
519 /* Output the line (which includes a trailing newline)
520 from NL+1 to BOL-1. */
521 fwrite (nl
+ 1, 1, bol
- (nl
+ 1), out
);
526 /* If there's anything left, output the last line: BUF .. BOL-1.
527 When the first byte of the input is a newline, there is nothing
530 fwrite (buf
, 1, bol
- buf
, out
);
535 tac_stdin_to_mem (void)
538 size_t bufsiz
= 8 * BUFSIZ
;
539 size_t delta
= 8 * BUFSIZ
;
546 buf
= (char *) malloc (bufsiz
);
548 buf
= (char *) realloc (buf
, bufsiz
);
552 /* Free the buffer and fall back on the code that relies on a
558 bytes_read
= safe_read (STDIN_FILENO
, buf
+ n_bytes
, bufsiz
- n_bytes
);
561 n_bytes
+= bytes_read
;
563 error (1, errno
, _("read error"));
568 tac_mem (buf
, n_bytes
, stdout
);
574 main (int argc
, char **argv
)
576 const char *error_message
; /* Return value from re_compile_pattern. */
578 int have_read_stdin
= 0;
580 program_name
= argv
[0];
581 setlocale (LC_ALL
, "");
582 bindtextdomain (PACKAGE
, LOCALEDIR
);
583 textdomain (PACKAGE
);
588 separator_ends_record
= 1;
590 while ((optc
= getopt_long (argc
, argv
, "brs:", longopts
, NULL
)) != -1)
597 separator_ends_record
= 0;
605 error (EXIT_FAILURE
, 0, _("separator cannot be empty"));
614 printf ("tac (%s) %s\n", GNU_PACKAGE
, VERSION
);
621 if (sentinel_length
== 0)
623 compiled_separator
.allocated
= 100;
624 compiled_separator
.buffer
= (unsigned char *)
625 xmalloc (compiled_separator
.allocated
);
626 compiled_separator
.fastmap
= xmalloc (256);
627 compiled_separator
.translate
= 0;
628 error_message
= re_compile_pattern (separator
, strlen (separator
),
629 &compiled_separator
);
631 error (EXIT_FAILURE
, 0, "%s", error_message
);
634 match_length
= sentinel_length
= strlen (separator
);
636 read_size
= INITIAL_READSIZE
;
637 /* A precaution that will probably never be needed. */
638 while (sentinel_length
* 2 >= read_size
)
640 G_buffer_size
= read_size
* 2 + sentinel_length
+ 2;
641 G_buffer
= xmalloc (G_buffer_size
);
644 strcpy (G_buffer
, separator
);
645 G_buffer
+= sentinel_length
;
653 errors
= tac_stdin_to_mem ();
656 for (; optind
< argc
; ++optind
)
658 if (strcmp (argv
[optind
], "-") == 0)
661 errors
|= tac_stdin_to_mem ();
664 errors
|= tac_file (argv
[optind
]);
667 /* Flush the output buffer. */
668 output ((char *) NULL
, (char *) NULL
);
670 if (have_read_stdin
&& close (0) < 0)
671 error (EXIT_FAILURE
, errno
, "-");
672 if (ferror (stdout
) || fclose (stdout
) == EOF
)
673 error (EXIT_FAILURE
, errno
, _("write error"));
674 exit (errors
== 0 ? EXIT_SUCCESS
: EXIT_FAILURE
);