Port parts of the code to C89 to minimize the need for c99-to-c89.diff,
[coreutils.git] / src / csplit.c
blob382fd6621476f5c39bba8b864ec08db46ae44b85
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
21 #include <config.h>
23 #include <getopt.h>
24 #include <sys/types.h>
25 #include <signal.h>
27 #include "system.h"
29 #include <regex.h>
31 #include "error.h"
32 #include "fd-reopen.h"
33 #include "inttostr.h"
34 #include "quote.h"
35 #include "safe-read.h"
36 #include "stdio--.h"
37 #include "xstrtol.h"
39 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
40 present. */
41 #ifndef SA_NOCLDSTOP
42 # define SA_NOCLDSTOP 0
43 # define sigprocmask(How, Set, Oset) /* empty */
44 # define sigset_t int
45 # if ! HAVE_SIGINTERRUPT
46 # define siginterrupt(sig, flag) /* empty */
47 # endif
48 #endif
50 /* The official name of this program (e.g., no `g' prefix). */
51 #define PROGRAM_NAME "csplit"
53 #define AUTHORS "Stuart Kemp", "David MacKenzie"
55 /* Increment size of area for control records. */
56 #define ALLOC_SIZE 20
58 /* The default prefix for output file names. */
59 #define DEFAULT_PREFIX "xx"
61 /* A compiled pattern arg. */
62 struct control
64 intmax_t offset; /* Offset from regexp to split at. */
65 uintmax_t lines_required; /* Number of lines required. */
66 uintmax_t repeat; /* Repeat count. */
67 int argnum; /* ARGV index. */
68 bool repeat_forever; /* True if `*' used as a repeat count. */
69 bool ignore; /* If true, produce no output (for regexp). */
70 bool regexpr; /* True if regular expression was used. */
71 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
74 /* Initial size of data area in buffers. */
75 #define START_SIZE 8191
77 /* Increment size for data area. */
78 #define INCR_SIZE 2048
80 /* Number of lines kept in each node in line list. */
81 #define CTRL_SIZE 80
83 #ifdef DEBUG
84 /* Some small values to test the algorithms. */
85 # define START_SIZE 200
86 # define INCR_SIZE 10
87 # define CTRL_SIZE 1
88 #endif
90 /* A string with a length count. */
91 struct cstring
93 size_t len;
94 char *str;
97 /* Pointers to the beginnings of lines in the buffer area.
98 These structures are linked together if needed. */
99 struct line
101 size_t used; /* Number of offsets used in this struct. */
102 size_t insert_index; /* Next offset to use when inserting line. */
103 size_t retrieve_index; /* Next index to use when retrieving line. */
104 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
105 struct line *next; /* Next in linked list. */
108 /* The structure to hold the input lines.
109 Contains a pointer to the data area and a list containing
110 pointers to the individual lines. */
111 struct buffer_record
113 size_t bytes_alloc; /* Size of the buffer area. */
114 size_t bytes_used; /* Bytes used in the buffer area. */
115 uintmax_t start_line; /* First line number in this buffer. */
116 uintmax_t first_available; /* First line that can be retrieved. */
117 size_t num_lines; /* Number of complete lines in this buffer. */
118 char *buffer; /* Data area. */
119 struct line *line_start; /* Head of list of pointers to lines. */
120 struct line *curr_line; /* The line start record currently in use. */
121 struct buffer_record *next;
124 static void close_output_file (void);
125 static void create_output_file (void);
126 static void delete_all_files (bool);
127 static void save_line_to_file (const struct cstring *line);
128 void usage (int status);
130 /* The name this program was run with. */
131 char *program_name;
133 /* Start of buffer list. */
134 static struct buffer_record *head = NULL;
136 /* Partially read line. */
137 static char *hold_area = NULL;
139 /* Number of bytes in `hold_area'. */
140 static size_t hold_count = 0;
142 /* Number of the last line in the buffers. */
143 static uintmax_t last_line_number = 0;
145 /* Number of the line currently being examined. */
146 static uintmax_t current_line = 0;
148 /* If true, we have read EOF. */
149 static bool have_read_eof = false;
151 /* Name of output files. */
152 static char *volatile filename_space = NULL;
154 /* Prefix part of output file names. */
155 static char const *volatile prefix = NULL;
157 /* Suffix part of output file names. */
158 static char *volatile suffix = NULL;
160 /* Number of digits to use in output file names. */
161 static int volatile digits = 2;
163 /* Number of files created so far. */
164 static unsigned int volatile files_created = 0;
166 /* Number of bytes written to current file. */
167 static uintmax_t bytes_written;
169 /* Output file pointer. */
170 static FILE *output_stream = NULL;
172 /* Output file name. */
173 static char *output_filename = NULL;
175 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
176 static char **global_argv;
178 /* If true, do not print the count of bytes in each output file. */
179 static bool suppress_count;
181 /* If true, remove output files on error. */
182 static bool volatile remove_files;
184 /* If true, remove all output files which have a zero length. */
185 static bool elide_empty_files;
187 /* The compiled pattern arguments, which determine how to split
188 the input file. */
189 static struct control *controls;
191 /* Number of elements in `controls'. */
192 static size_t control_used;
194 /* The set of signals that are caught. */
195 static sigset_t caught_signals;
197 static struct option const longopts[] =
199 {"digits", required_argument, NULL, 'n'},
200 {"quiet", no_argument, NULL, 'q'},
201 {"silent", no_argument, NULL, 's'},
202 {"keep-files", no_argument, NULL, 'k'},
203 {"elide-empty-files", no_argument, NULL, 'z'},
204 {"prefix", required_argument, NULL, 'f'},
205 {"suffix-format", required_argument, NULL, 'b'},
206 {GETOPT_HELP_OPTION_DECL},
207 {GETOPT_VERSION_OPTION_DECL},
208 {NULL, 0, NULL, 0}
211 /* Optionally remove files created so far; then exit.
212 Called when an error detected. */
214 static void
215 cleanup (void)
217 sigset_t oldset;
219 close_output_file ();
221 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
222 delete_all_files (false);
223 sigprocmask (SIG_SETMASK, &oldset, NULL);
226 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
227 static void
228 cleanup_fatal (void)
230 cleanup ();
231 exit (EXIT_FAILURE);
234 extern void
235 xalloc_die (void)
237 error (0, 0, "%s", _("memory exhausted"));
238 cleanup_fatal ();
241 static void
242 interrupt_handler (int sig)
244 if (! SA_NOCLDSTOP)
245 signal (sig, SIG_IGN);
247 delete_all_files (true);
249 signal (sig, SIG_DFL);
250 raise (sig);
253 /* Keep track of NUM bytes of a partial line in buffer START.
254 These bytes will be retrieved later when another large buffer is read. */
256 static void
257 save_to_hold_area (char *start, size_t num)
259 free (hold_area);
260 hold_area = start;
261 hold_count = num;
264 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
265 Return the number of bytes read. */
267 static size_t
268 read_input (char *dest, size_t max_n_bytes)
270 size_t bytes_read;
272 if (max_n_bytes == 0)
273 return 0;
275 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
277 if (bytes_read == 0)
278 have_read_eof = true;
280 if (bytes_read == SAFE_READ_ERROR)
282 error (0, errno, _("read error"));
283 cleanup_fatal ();
286 return bytes_read;
289 /* Initialize existing line record P. */
291 static void
292 clear_line_control (struct line *p)
294 p->used = 0;
295 p->insert_index = 0;
296 p->retrieve_index = 0;
299 /* Return a new, initialized line record. */
301 static struct line *
302 new_line_control (void)
304 struct line *p = xmalloc (sizeof *p);
306 p->next = NULL;
307 clear_line_control (p);
309 return p;
312 /* Record LINE_START, which is the address of the start of a line
313 of length LINE_LEN in the large buffer, in the lines buffer of B. */
315 static void
316 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
318 struct line *l;
320 /* If there is no existing area to keep line info, get some. */
321 if (b->line_start == NULL)
322 b->line_start = b->curr_line = new_line_control ();
324 /* If existing area for lines is full, get more. */
325 if (b->curr_line->used == CTRL_SIZE)
327 b->curr_line->next = new_line_control ();
328 b->curr_line = b->curr_line->next;
331 l = b->curr_line;
333 /* Record the start of the line, and update counters. */
334 l->starts[l->insert_index].str = line_start;
335 l->starts[l->insert_index].len = line_len;
336 l->used++;
337 l->insert_index++;
340 /* Scan the buffer in B for newline characters
341 and record the line start locations and lengths in B.
342 Return the number of lines found in this buffer.
344 There may be an incomplete line at the end of the buffer;
345 a pointer is kept to this area, which will be used when
346 the next buffer is filled. */
348 static size_t
349 record_line_starts (struct buffer_record *b)
351 char *line_start; /* Start of current line. */
352 char *line_end; /* End of each line found. */
353 size_t bytes_left; /* Length of incomplete last line. */
354 size_t lines; /* Number of lines found. */
355 size_t line_length; /* Length of each line found. */
357 if (b->bytes_used == 0)
358 return 0;
360 lines = 0;
361 line_start = b->buffer;
362 bytes_left = b->bytes_used;
364 for (;;)
366 line_end = memchr (line_start, '\n', bytes_left);
367 if (line_end == NULL)
368 break;
369 line_length = line_end - line_start + 1;
370 keep_new_line (b, line_start, line_length);
371 bytes_left -= line_length;
372 line_start = line_end + 1;
373 lines++;
376 /* Check for an incomplete last line. */
377 if (bytes_left)
379 if (have_read_eof)
381 keep_new_line (b, line_start, bytes_left);
382 lines++;
384 else
385 save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
388 b->num_lines = lines;
389 b->first_available = b->start_line = last_line_number + 1;
390 last_line_number += lines;
392 return lines;
395 /* Return a new buffer with room to store SIZE bytes, plus
396 an extra byte for safety. */
398 static struct buffer_record *
399 create_new_buffer (size_t size)
401 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
403 new_buffer->buffer = xmalloc (size + 1);
405 new_buffer->bytes_alloc = size;
406 new_buffer->line_start = new_buffer->curr_line = NULL;
408 return new_buffer;
411 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
412 least that size is currently free, use it, otherwise create a new one. */
414 static struct buffer_record *
415 get_new_buffer (size_t min_size)
417 struct buffer_record *new_buffer; /* Buffer to return. */
418 size_t alloc_size; /* Actual size that will be requested. */
420 alloc_size = START_SIZE;
421 if (alloc_size < min_size)
423 size_t s = min_size - alloc_size + INCR_SIZE - 1;
424 alloc_size += s - s % INCR_SIZE;
427 new_buffer = create_new_buffer (alloc_size);
429 new_buffer->num_lines = 0;
430 new_buffer->bytes_used = 0;
431 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
432 new_buffer->next = NULL;
434 return new_buffer;
437 static void
438 free_buffer (struct buffer_record *buf)
440 free (buf->buffer);
441 buf->buffer = NULL;
444 /* Append buffer BUF to the linked list of buffers that contain
445 some data yet to be processed. */
447 static void
448 save_buffer (struct buffer_record *buf)
450 struct buffer_record *p;
452 buf->next = NULL;
453 buf->curr_line = buf->line_start;
455 if (head == NULL)
456 head = buf;
457 else
459 for (p = head; p->next; p = p->next)
460 /* Do nothing. */ ;
461 p->next = buf;
465 /* Fill a buffer of input.
467 Set the initial size of the buffer to a default.
468 Fill the buffer (from the hold area and input stream)
469 and find the individual lines.
470 If no lines are found (the buffer is too small to hold the next line),
471 release the current buffer (whose contents would have been put in the
472 hold area) and repeat the process with another large buffer until at least
473 one entire line has been read.
475 Return true if a new buffer was obtained, otherwise false
476 (in which case end-of-file must have been encountered). */
478 static bool
479 load_buffer (void)
481 struct buffer_record *b;
482 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
483 size_t bytes_avail; /* Size of new buffer created. */
484 size_t lines_found; /* Number of lines in this new buffer. */
485 char *p; /* Place to load into buffer. */
487 if (have_read_eof)
488 return false;
490 /* We must make the buffer at least as large as the amount of data
491 in the partial line left over from the last call. */
492 if (bytes_wanted < hold_count)
493 bytes_wanted = hold_count;
495 while (1)
497 b = get_new_buffer (bytes_wanted);
498 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
499 p = b->buffer;
501 /* First check the `holding' area for a partial line. */
502 if (hold_count)
504 memcpy (p, hold_area, hold_count);
505 p += hold_count;
506 b->bytes_used += hold_count;
507 bytes_avail -= hold_count;
508 hold_count = 0;
511 b->bytes_used += read_input (p, bytes_avail);
513 lines_found = record_line_starts (b);
514 if (!lines_found)
515 free_buffer (b);
517 if (lines_found || have_read_eof)
518 break;
520 if (xalloc_oversized (2, b->bytes_alloc))
521 xalloc_die ();
522 bytes_wanted = 2 * b->bytes_alloc;
523 free_buffer (b);
524 free (b);
527 if (lines_found)
528 save_buffer (b);
529 else
530 free (b);
532 return lines_found != 0;
535 /* Return the line number of the first line that has not yet been retrieved. */
537 static uintmax_t
538 get_first_line_in_buffer (void)
540 if (head == NULL && !load_buffer ())
541 error (EXIT_FAILURE, errno, _("input disappeared"));
543 return head->first_available;
546 /* Return a pointer to the logical first line in the buffer and make the
547 next line the logical first line.
548 Return NULL if there is no more input. */
550 static struct cstring *
551 remove_line (void)
553 /* If non-NULL, this is the buffer for which the previous call
554 returned the final line. So now, presuming that line has been
555 processed, we can free the buffer and reset this pointer. */
556 static struct buffer_record *prev_buf = NULL;
558 struct cstring *line; /* Return value. */
559 struct line *l; /* For convenience. */
561 if (prev_buf)
563 free_buffer (prev_buf);
564 prev_buf = NULL;
567 if (head == NULL && !load_buffer ())
568 return NULL;
570 if (current_line < head->first_available)
571 current_line = head->first_available;
573 ++(head->first_available);
575 l = head->curr_line;
577 line = &l->starts[l->retrieve_index];
579 /* Advance index to next line. */
580 if (++l->retrieve_index == l->used)
582 /* Go on to the next line record. */
583 head->curr_line = l->next;
584 if (head->curr_line == NULL || head->curr_line->used == 0)
586 /* Go on to the next data block.
587 but first record the current one so we can free it
588 once the line we're returning has been processed. */
589 prev_buf = head;
590 head = head->next;
594 return line;
597 /* Search the buffers for line LINENUM, reading more input if necessary.
598 Return a pointer to the line, or NULL if it is not found in the file. */
600 static struct cstring *
601 find_line (uintmax_t linenum)
603 struct buffer_record *b;
605 if (head == NULL && !load_buffer ())
606 return NULL;
608 if (linenum < head->start_line)
609 return NULL;
611 for (b = head;;)
613 if (linenum < b->start_line + b->num_lines)
615 /* The line is in this buffer. */
616 struct line *l;
617 size_t offset; /* How far into the buffer the line is. */
619 l = b->line_start;
620 offset = linenum - b->start_line;
621 /* Find the control record. */
622 while (offset >= CTRL_SIZE)
624 l = l->next;
625 offset -= CTRL_SIZE;
627 return &l->starts[offset];
629 if (b->next == NULL && !load_buffer ())
630 return NULL;
631 b = b->next; /* Try the next data block. */
635 /* Return true if at least one more line is available for input. */
637 static bool
638 no_more_lines (void)
640 return find_line (current_line + 1) == NULL;
643 /* Open NAME as standard input. */
645 static void
646 set_input_file (const char *name)
648 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
649 error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (name));
652 /* Write all lines from the beginning of the buffer up to, but
653 not including, line LAST_LINE, to the current output file.
654 If IGNORE is true, do not output lines selected here.
655 ARGNUM is the index in ARGV of the current pattern. */
657 static void
658 write_to_file (uintmax_t last_line, bool ignore, int argnum)
660 struct cstring *line;
661 uintmax_t first_line; /* First available input line. */
662 uintmax_t lines; /* Number of lines to output. */
663 uintmax_t i;
665 first_line = get_first_line_in_buffer ();
667 if (first_line > last_line)
669 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
670 cleanup_fatal ();
673 lines = last_line - first_line;
675 for (i = 0; i < lines; i++)
677 line = remove_line ();
678 if (line == NULL)
680 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
681 cleanup_fatal ();
683 if (!ignore)
684 save_line_to_file (line);
688 /* Output any lines left after all regexps have been processed. */
690 static void
691 dump_rest_of_file (void)
693 struct cstring *line;
695 while ((line = remove_line ()) != NULL)
696 save_line_to_file (line);
699 /* Handle an attempt to read beyond EOF under the control of record P,
700 on iteration REPETITION if nonzero. */
702 static void handle_line_error (const struct control *, uintmax_t)
703 ATTRIBUTE_NORETURN;
704 static void
705 handle_line_error (const struct control *p, uintmax_t repetition)
707 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
709 fprintf (stderr, _("%s: %s: line number out of range"),
710 program_name, quote (umaxtostr (p->lines_required, buf)));
711 if (repetition)
712 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
713 else
714 fprintf (stderr, "\n");
716 cleanup_fatal ();
719 /* Determine the line number that marks the end of this file,
720 then get those lines and save them to the output file.
721 P is the control record.
722 REPETITION is the repetition number. */
724 static void
725 process_line_count (const struct control *p, uintmax_t repetition)
727 uintmax_t linenum;
728 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
729 struct cstring *line;
731 create_output_file ();
733 linenum = get_first_line_in_buffer ();
735 while (linenum++ < last_line_to_save)
737 line = remove_line ();
738 if (line == NULL)
739 handle_line_error (p, repetition);
740 save_line_to_file (line);
743 close_output_file ();
745 /* Ensure that the line number specified is not 1 greater than
746 the number of lines in the file. */
747 if (no_more_lines ())
748 handle_line_error (p, repetition);
751 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
752 static void
753 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
755 fprintf (stderr, _("%s: %s: match not found"),
756 program_name, quote (global_argv[p->argnum]));
758 if (repetition)
760 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
761 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
763 else
764 fprintf (stderr, "\n");
766 if (!ignore)
768 dump_rest_of_file ();
769 close_output_file ();
771 cleanup_fatal ();
774 /* Read the input until a line matches the regexp in P, outputting
775 it unless P->IGNORE is true.
776 REPETITION is this repeat-count; 0 means the first time. */
778 static void
779 process_regexp (struct control *p, uintmax_t repetition)
781 struct cstring *line; /* From input file. */
782 size_t line_len; /* To make "$" in regexps work. */
783 uintmax_t break_line; /* First line number of next file. */
784 bool ignore = p->ignore; /* If true, skip this section. */
785 regoff_t ret;
787 if (!ignore)
788 create_output_file ();
790 /* If there is no offset for the regular expression, or
791 it is positive, then it is not necessary to buffer the lines. */
793 if (p->offset >= 0)
795 for (;;)
797 line = find_line (++current_line);
798 if (line == NULL)
800 if (p->repeat_forever)
802 if (!ignore)
804 dump_rest_of_file ();
805 close_output_file ();
807 exit (EXIT_SUCCESS);
809 else
810 regexp_error (p, repetition, ignore);
812 line_len = line->len;
813 if (line->str[line_len - 1] == '\n')
814 line_len--;
815 ret = re_search (&p->re_compiled, line->str, line_len,
816 0, line_len, NULL);
817 if (ret == -2)
819 error (0, 0, _("error in regular expression search"));
820 cleanup_fatal ();
822 if (ret == -1)
824 line = remove_line ();
825 if (!ignore)
826 save_line_to_file (line);
828 else
829 break;
832 else
834 /* Buffer the lines. */
835 for (;;)
837 line = find_line (++current_line);
838 if (line == NULL)
840 if (p->repeat_forever)
842 if (!ignore)
844 dump_rest_of_file ();
845 close_output_file ();
847 exit (EXIT_SUCCESS);
849 else
850 regexp_error (p, repetition, ignore);
852 line_len = line->len;
853 if (line->str[line_len - 1] == '\n')
854 line_len--;
855 ret = re_search (&p->re_compiled, line->str, line_len,
856 0, line_len, NULL);
857 if (ret == -2)
859 error (0, 0, _("error in regular expression search"));
860 cleanup_fatal ();
862 if (ret != -1)
863 break;
867 /* Account for any offset from this regexp. */
868 break_line = current_line + p->offset;
870 write_to_file (break_line, ignore, p->argnum);
872 if (!ignore)
873 close_output_file ();
875 if (p->offset > 0)
876 current_line = break_line;
879 /* Split the input file according to the control records we have built. */
881 static void
882 split_file (void)
884 size_t i;
886 for (i = 0; i < control_used; i++)
888 uintmax_t j;
889 if (controls[i].regexpr)
891 for (j = 0; (controls[i].repeat_forever
892 || j <= controls[i].repeat); j++)
893 process_regexp (&controls[i], j);
895 else
897 for (j = 0; (controls[i].repeat_forever
898 || j <= controls[i].repeat); j++)
899 process_line_count (&controls[i], j);
903 create_output_file ();
904 dump_rest_of_file ();
905 close_output_file ();
908 /* Return the name of output file number NUM.
910 This function is called from a signal handler, so it should invoke
911 only reentrant functions that are async-signal-safe. POSIX does
912 not guarantee this for the functions called below, but we don't
913 know of any hosts where this implementation isn't safe. */
915 static char *
916 make_filename (unsigned int num)
918 strcpy (filename_space, prefix);
919 if (suffix)
920 sprintf (filename_space + strlen (prefix), suffix, num);
921 else
922 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
923 return filename_space;
926 /* Create the next output file. */
928 static void
929 create_output_file (void)
931 sigset_t oldset;
932 bool fopen_ok;
933 int fopen_errno;
935 output_filename = make_filename (files_created);
937 /* Create the output file in a critical section, to avoid races. */
938 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
939 output_stream = fopen (output_filename, "w");
940 fopen_ok = (output_stream != NULL);
941 fopen_errno = errno;
942 files_created += fopen_ok;
943 sigprocmask (SIG_SETMASK, &oldset, NULL);
945 if (! fopen_ok)
947 error (0, fopen_errno, "%s", output_filename);
948 cleanup_fatal ();
950 bytes_written = 0;
953 /* If requested, delete all the files we have created. This function
954 must be called only from critical sections. */
956 static void
957 delete_all_files (bool in_signal_handler)
959 unsigned int i;
961 if (! remove_files)
962 return;
964 for (i = 0; i < files_created; i++)
966 const char *name = make_filename (i);
967 if (unlink (name) != 0 && !in_signal_handler)
968 error (0, errno, "%s", name);
971 files_created = 0;
974 /* Close the current output file and print the count
975 of characters in this file. */
977 static void
978 close_output_file (void)
980 if (output_stream)
982 if (ferror (output_stream))
984 error (0, 0, _("write error for %s"), quote (output_filename));
985 output_stream = NULL;
986 cleanup_fatal ();
988 if (fclose (output_stream) != 0)
990 error (0, errno, "%s", output_filename);
991 output_stream = NULL;
992 cleanup_fatal ();
994 if (bytes_written == 0 && elide_empty_files)
996 sigset_t oldset;
997 bool unlink_ok;
998 int unlink_errno;
1000 /* Remove the output file in a critical section, to avoid races. */
1001 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
1002 unlink_ok = (unlink (output_filename) == 0);
1003 unlink_errno = errno;
1004 files_created -= unlink_ok;
1005 sigprocmask (SIG_SETMASK, &oldset, NULL);
1007 if (! unlink_ok)
1008 error (0, unlink_errno, "%s", output_filename);
1010 else
1012 if (!suppress_count)
1014 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1015 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1018 output_stream = NULL;
1022 /* Save line LINE to the output file and
1023 increment the character count for the current file. */
1025 static void
1026 save_line_to_file (const struct cstring *line)
1028 fwrite (line->str, sizeof (char), line->len, output_stream);
1029 bytes_written += line->len;
1032 /* Return a new, initialized control record. */
1034 static struct control *
1035 new_control_record (void)
1037 static size_t control_allocated = 0; /* Total space allocated. */
1038 struct control *p;
1040 if (control_used == control_allocated)
1041 controls = X2NREALLOC (controls, &control_allocated);
1042 p = &controls[control_used++];
1043 p->regexpr = false;
1044 p->repeat = 0;
1045 p->repeat_forever = false;
1046 p->lines_required = 0;
1047 p->offset = 0;
1048 return p;
1051 /* Check if there is a numeric offset after a regular expression.
1052 STR is the entire command line argument.
1053 P is the control record for this regular expression.
1054 NUM is the numeric part of STR. */
1056 static void
1057 check_for_offset (struct control *p, const char *str, const char *num)
1059 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1060 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1063 /* Given that the first character of command line arg STR is '{',
1064 make sure that the rest of the string is a valid repeat count
1065 and store its value in P.
1066 ARGNUM is the ARGV index of STR. */
1068 static void
1069 parse_repeat_count (int argnum, struct control *p, char *str)
1071 uintmax_t val;
1072 char *end;
1074 end = str + strlen (str) - 1;
1075 if (*end != '}')
1076 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1077 *end = '\0';
1079 if (str+1 == end-1 && *(str+1) == '*')
1080 p->repeat_forever = true;
1081 else
1083 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1085 error (EXIT_FAILURE, 0,
1086 _("%s}: integer required between `{' and `}'"),
1087 global_argv[argnum]);
1089 p->repeat = val;
1092 *end = '}';
1095 /* Extract the regular expression from STR and check for a numeric offset.
1096 STR should start with the regexp delimiter character.
1097 Return a new control record for the regular expression.
1098 ARGNUM is the ARGV index of STR.
1099 Unless IGNORE is true, mark these lines for output. */
1101 static struct control *
1102 extract_regexp (int argnum, bool ignore, char const *str)
1104 size_t len; /* Number of bytes in this regexp. */
1105 char delim = *str;
1106 char const *closing_delim;
1107 struct control *p;
1108 const char *err;
1110 closing_delim = strrchr (str + 1, delim);
1111 if (closing_delim == NULL)
1112 error (EXIT_FAILURE, 0,
1113 _("%s: closing delimiter `%c' missing"), str, delim);
1115 len = closing_delim - str - 1;
1116 p = new_control_record ();
1117 p->argnum = argnum;
1118 p->ignore = ignore;
1120 p->regexpr = true;
1121 p->re_compiled.buffer = NULL;
1122 p->re_compiled.allocated = 0;
1123 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1124 p->re_compiled.translate = NULL;
1125 re_syntax_options =
1126 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1127 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1128 if (err)
1130 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1131 cleanup_fatal ();
1134 if (closing_delim[1])
1135 check_for_offset (p, str, closing_delim + 1);
1137 return p;
1140 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1141 After each pattern, check if the next argument is a repeat count. */
1143 static void
1144 parse_patterns (int argc, int start, char **argv)
1146 int i; /* Index into ARGV. */
1147 struct control *p; /* New control record created. */
1148 uintmax_t val;
1149 static uintmax_t last_val = 0;
1151 for (i = start; i < argc; i++)
1153 if (*argv[i] == '/' || *argv[i] == '%')
1155 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1157 else
1159 p = new_control_record ();
1160 p->argnum = i;
1162 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1163 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1164 if (val == 0)
1165 error (EXIT_FAILURE, 0,
1166 _("%s: line number must be greater than zero"),
1167 argv[i]);
1168 if (val < last_val)
1170 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1171 error (EXIT_FAILURE, 0,
1172 _("line number %s is smaller than preceding line number, %s"),
1173 quote (argv[i]), umaxtostr (last_val, buf));
1176 if (val == last_val)
1177 error (0, 0,
1178 _("warning: line number %s is the same as preceding line number"),
1179 quote (argv[i]));
1181 last_val = val;
1183 p->lines_required = val;
1186 if (i + 1 < argc && *argv[i + 1] == '{')
1188 /* We have a repeat count. */
1189 i++;
1190 parse_repeat_count (i, p, argv[i]);
1195 static unsigned int
1196 get_format_flags (char **format_ptr)
1198 unsigned int count = 0;
1200 for (; **format_ptr; (*format_ptr)++)
1202 switch (**format_ptr)
1204 case '-':
1205 break;
1207 case '+':
1208 case ' ':
1209 count |= 1;
1210 break;
1212 case '#':
1213 count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
1214 break;
1216 default:
1217 return count;
1220 return count;
1223 static size_t
1224 get_format_width (char **format_ptr)
1226 unsigned long int val = 0;
1228 if (ISDIGIT (**format_ptr)
1229 && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1230 || SIZE_MAX < val))
1231 error (EXIT_FAILURE, 0, _("invalid format width"));
1233 /* Allow for enough octal digits to represent the value of UINT_MAX,
1234 even if the field width is less than that. */
1235 return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
1238 static size_t
1239 get_format_prec (char **format_ptr)
1241 if (**format_ptr != '.')
1242 return 0;
1243 (*format_ptr)++;
1245 if (! ISDIGIT (**format_ptr))
1246 return 0;
1247 else
1249 unsigned long int val;
1250 if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1251 || SIZE_MAX < val)
1252 error (EXIT_FAILURE, 0, _("invalid format precision"));
1253 return val;
1257 static void
1258 get_format_conv_type (char **format_ptr)
1260 unsigned char ch = *(*format_ptr)++;
1262 switch (ch)
1264 case 'd':
1265 case 'i':
1266 case 'o':
1267 case 'u':
1268 case 'x':
1269 case 'X':
1270 break;
1272 case 0:
1273 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1274 break;
1276 default:
1277 if (isprint (ch))
1278 error (EXIT_FAILURE, 0,
1279 _("invalid conversion specifier in suffix: %c"), ch);
1280 else
1281 error (EXIT_FAILURE, 0,
1282 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1286 static size_t
1287 max_out (char *format)
1289 size_t out_count = 0;
1290 bool percent = false;
1292 while (*format)
1294 if (*format++ != '%')
1295 out_count++;
1296 else if (*format == '%')
1298 format++;
1299 out_count++;
1301 else
1303 if (percent)
1304 error (EXIT_FAILURE, 0,
1305 _("too many %% conversion specifications in suffix"));
1306 percent = true;
1307 out_count += get_format_flags (&format);
1309 size_t width = get_format_width (&format);
1310 size_t prec = get_format_prec (&format);
1312 out_count += MAX (width, prec);
1314 get_format_conv_type (&format);
1318 if (! percent)
1319 error (EXIT_FAILURE, 0,
1320 _("missing %% conversion specification in suffix"));
1322 return out_count;
1326 main (int argc, char **argv)
1328 int optc;
1329 unsigned long int val;
1331 initialize_main (&argc, &argv);
1332 program_name = argv[0];
1333 setlocale (LC_ALL, "");
1334 bindtextdomain (PACKAGE, LOCALEDIR);
1335 textdomain (PACKAGE);
1337 atexit (close_stdout);
1339 global_argv = argv;
1340 controls = NULL;
1341 control_used = 0;
1342 suppress_count = false;
1343 remove_files = true;
1344 prefix = DEFAULT_PREFIX;
1346 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1347 switch (optc)
1349 case 'f':
1350 prefix = optarg;
1351 break;
1353 case 'b':
1354 suffix = optarg;
1355 break;
1357 case 'k':
1358 remove_files = false;
1359 break;
1361 case 'n':
1362 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1363 || val > INT_MAX)
1364 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1365 digits = val;
1366 break;
1368 case 's':
1369 case 'q':
1370 suppress_count = true;
1371 break;
1373 case 'z':
1374 elide_empty_files = true;
1375 break;
1377 case_GETOPT_HELP_CHAR;
1379 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1381 default:
1382 usage (EXIT_FAILURE);
1385 if (argc - optind < 2)
1387 if (argc <= optind)
1388 error (0, 0, _("missing operand"));
1389 else
1390 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1391 usage (EXIT_FAILURE);
1394 if (suffix)
1395 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1396 else
1397 filename_space = xmalloc (strlen (prefix) + digits + 2);
1399 set_input_file (argv[optind++]);
1401 parse_patterns (argc, optind, argv);
1404 int i;
1405 static int const sig[] = { SIGHUP, SIGINT, SIGQUIT, SIGTERM };
1406 enum { nsigs = sizeof sig / sizeof sig[0] };
1408 #if SA_NOCLDSTOP
1409 struct sigaction act;
1411 sigemptyset (&caught_signals);
1412 for (i = 0; i < nsigs; i++)
1414 sigaction (sig[i], NULL, &act);
1415 if (act.sa_handler != SIG_IGN)
1416 sigaddset (&caught_signals, sig[i]);
1419 act.sa_handler = interrupt_handler;
1420 act.sa_mask = caught_signals;
1421 act.sa_flags = 0;
1423 for (i = 0; i < nsigs; i++)
1424 if (sigismember (&caught_signals, sig[i]))
1425 sigaction (sig[i], &act, NULL);
1426 #else
1427 for (i = 0; i < nsigs; i++)
1428 if (signal (sig[i], SIG_IGN) != SIG_IGN)
1430 signal (sig[i], interrupt_handler);
1431 siginterrupt (sig[i], 1);
1433 #endif
1436 split_file ();
1438 if (close (STDIN_FILENO) != 0)
1440 error (0, errno, _("read error"));
1441 cleanup_fatal ();
1444 exit (EXIT_SUCCESS);
1447 void
1448 usage (int status)
1450 if (status != EXIT_SUCCESS)
1451 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1452 program_name);
1453 else
1455 printf (_("\
1456 Usage: %s [OPTION]... FILE PATTERN...\n\
1458 program_name);
1459 fputs (_("\
1460 Output pieces of FILE separated by PATTERN(s) to files `xx00', `xx01', ...,\n\
1461 and output byte counts of each piece to standard output.\n\
1463 "), stdout);
1464 fputs (_("\
1465 Mandatory arguments to long options are mandatory for short options too.\n\
1466 "), stdout);
1467 fputs (_("\
1468 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1469 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1470 -k, --keep-files do not remove output files on errors\n\
1471 "), stdout);
1472 fputs (_("\
1473 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1474 -s, --quiet, --silent do not print counts of output file sizes\n\
1475 -z, --elide-empty-files remove empty output files\n\
1476 "), stdout);
1477 fputs (HELP_OPTION_DESCRIPTION, stdout);
1478 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1479 fputs (_("\
1481 Read standard input if FILE is -. Each PATTERN may be:\n\
1482 "), stdout);
1483 fputs (_("\
1485 INTEGER copy up to but not including specified line number\n\
1486 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1487 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1488 {INTEGER} repeat the previous pattern specified number of times\n\
1489 {*} repeat the previous pattern as many times as possible\n\
1491 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1492 "), stdout);
1493 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
1495 exit (status);