Avoid spurious failure on x86 solaris2.9 when using c89.
[coreutils.git] / src / csplit.c
blob4bc75e283df6aae36d50959c433910966dadb895
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2004 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
21 #include <config.h>
23 #include <stdio.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <signal.h>
28 #include "system.h"
30 #include <regex.h>
32 #include "error.h"
33 #include "inttostr.h"
34 #include "safe-read.h"
35 #include "quote.h"
36 #include "xstrtol.h"
38 #ifndef SA_NOCLDSTOP
39 # define sigprocmask(How, Set, Oset) /* empty */
40 # define sigset_t int
41 #endif
43 /* The official name of this program (e.g., no `g' prefix). */
44 #define PROGRAM_NAME "csplit"
46 #define AUTHORS "Stuart Kemp", "David MacKenzie"
48 /* Increment size of area for control records. */
49 #define ALLOC_SIZE 20
51 /* The default prefix for output file names. */
52 #define DEFAULT_PREFIX "xx"
54 /* A compiled pattern arg. */
55 struct control
57 char *regexpr; /* Non-compiled regular expression. */
58 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
59 intmax_t offset; /* Offset from regexp to split at. */
60 uintmax_t lines_required; /* Number of lines required. */
61 uintmax_t repeat; /* Repeat count. */
62 int argnum; /* ARGV index. */
63 bool repeat_forever; /* True if `*' used as a repeat count. */
64 bool ignore; /* If true, produce no output (for regexp). */
67 /* Initial size of data area in buffers. */
68 #define START_SIZE 8191
70 /* Increment size for data area. */
71 #define INCR_SIZE 2048
73 /* Number of lines kept in each node in line list. */
74 #define CTRL_SIZE 80
76 #ifdef DEBUG
77 /* Some small values to test the algorithms. */
78 # define START_SIZE 200
79 # define INCR_SIZE 10
80 # define CTRL_SIZE 1
81 #endif
83 /* A string with a length count. */
84 struct cstring
86 size_t len;
87 char *str;
90 /* Pointers to the beginnings of lines in the buffer area.
91 These structures are linked together if needed. */
92 struct line
94 size_t used; /* Number of offsets used in this struct. */
95 size_t insert_index; /* Next offset to use when inserting line. */
96 size_t retrieve_index; /* Next index to use when retrieving line. */
97 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
98 struct line *next; /* Next in linked list. */
101 /* The structure to hold the input lines.
102 Contains a pointer to the data area and a list containing
103 pointers to the individual lines. */
104 struct buffer_record
106 size_t bytes_alloc; /* Size of the buffer area. */
107 size_t bytes_used; /* Bytes used in the buffer area. */
108 uintmax_t start_line; /* First line number in this buffer. */
109 uintmax_t first_available; /* First line that can be retrieved. */
110 size_t num_lines; /* Number of complete lines in this buffer. */
111 char *buffer; /* Data area. */
112 struct line *line_start; /* Head of list of pointers to lines. */
113 struct line *curr_line; /* The line start record currently in use. */
114 struct buffer_record *next;
117 static void close_output_file (void);
118 static void create_output_file (void);
119 static void delete_all_files (void);
120 static void save_line_to_file (const struct cstring *line);
121 void usage (int status);
123 /* The name this program was run with. */
124 char *program_name;
126 /* Input file descriptor. */
127 static int input_desc = 0;
129 /* Start of buffer list. */
130 static struct buffer_record *head = NULL;
132 /* Partially read line. */
133 static char *hold_area = NULL;
135 /* Number of bytes in `hold_area'. */
136 static size_t hold_count = 0;
138 /* Number of the last line in the buffers. */
139 static uintmax_t last_line_number = 0;
141 /* Number of the line currently being examined. */
142 static uintmax_t current_line = 0;
144 /* If true, we have read EOF. */
145 static bool have_read_eof = false;
147 /* Name of output files. */
148 static char * volatile filename_space = NULL;
150 /* Prefix part of output file names. */
151 static char * volatile prefix = NULL;
153 /* Suffix part of output file names. */
154 static char * volatile suffix = NULL;
156 /* Number of digits to use in output file names. */
157 static int volatile digits = 2;
159 /* Number of files created so far. */
160 static unsigned int volatile files_created = 0;
162 /* Number of bytes written to current file. */
163 static uintmax_t bytes_written;
165 /* Output file pointer. */
166 static FILE *output_stream = NULL;
168 /* Output file name. */
169 static char *output_filename = NULL;
171 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
172 static char **global_argv;
174 /* If true, do not print the count of bytes in each output file. */
175 static bool suppress_count;
177 /* If true, remove output files on error. */
178 static bool volatile remove_files;
180 /* If true, remove all output files which have a zero length. */
181 static bool elide_empty_files;
183 /* The compiled pattern arguments, which determine how to split
184 the input file. */
185 static struct control *controls;
187 /* Number of elements in `controls'. */
188 static size_t control_used;
190 /* The set of signals that are caught. */
191 static sigset_t caught_signals;
193 static struct option const longopts[] =
195 {"digits", required_argument, NULL, 'n'},
196 {"quiet", no_argument, NULL, 'q'},
197 {"silent", no_argument, NULL, 's'},
198 {"keep-files", no_argument, NULL, 'k'},
199 {"elide-empty-files", no_argument, NULL, 'z'},
200 {"prefix", required_argument, NULL, 'f'},
201 {"suffix-format", required_argument, NULL, 'b'},
202 {GETOPT_HELP_OPTION_DECL},
203 {GETOPT_VERSION_OPTION_DECL},
204 {NULL, 0, NULL, 0}
207 /* Optionally remove files created so far; then exit.
208 Called when an error detected. */
210 static void
211 cleanup (void)
213 sigset_t oldset;
215 close_output_file ();
217 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
218 delete_all_files ();
219 sigprocmask (SIG_SETMASK, &oldset, NULL);
222 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
223 static void
224 cleanup_fatal (void)
226 cleanup ();
227 exit (EXIT_FAILURE);
230 extern void
231 xalloc_die (void)
233 error (0, 0, "%s", _("memory exhausted"));
234 cleanup_fatal ();
237 static void
238 interrupt_handler (int sig)
240 #ifndef SA_NOCLDSTOP
241 signal (sig, SIG_IGN);
242 #endif
244 delete_all_files ();
246 signal (sig, SIG_DFL);
247 raise (sig);
250 /* Keep track of NUM bytes of a partial line in buffer START.
251 These bytes will be retrieved later when another large buffer is read.
252 It is not necessary to create a new buffer for these bytes; instead,
253 we keep a pointer to the existing buffer. This buffer *is* on the
254 free list, and when the next buffer is obtained from this list
255 (even if it is this one), these bytes will be placed at the
256 start of the new buffer. */
258 static void
259 save_to_hold_area (char *start, size_t num)
261 hold_area = start;
262 hold_count = num;
265 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
266 Return the number of bytes read. */
268 static size_t
269 read_input (char *dest, size_t max_n_bytes)
271 size_t bytes_read;
273 if (max_n_bytes == 0)
274 return 0;
276 bytes_read = safe_read (input_desc, dest, max_n_bytes);
278 if (bytes_read == 0)
279 have_read_eof = true;
281 if (bytes_read == SAFE_READ_ERROR)
283 error (0, errno, _("read error"));
284 cleanup_fatal ();
287 return bytes_read;
290 /* Initialize existing line record P. */
292 static void
293 clear_line_control (struct line *p)
295 p->used = 0;
296 p->insert_index = 0;
297 p->retrieve_index = 0;
300 /* Return a new, initialized line record. */
302 static struct line *
303 new_line_control (void)
305 struct line *p = xmalloc (sizeof *p);
307 p->next = NULL;
308 clear_line_control (p);
310 return p;
313 /* Record LINE_START, which is the address of the start of a line
314 of length LINE_LEN in the large buffer, in the lines buffer of B. */
316 static void
317 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
319 struct line *l;
321 /* If there is no existing area to keep line info, get some. */
322 if (b->line_start == NULL)
323 b->line_start = b->curr_line = new_line_control ();
325 /* If existing area for lines is full, get more. */
326 if (b->curr_line->used == CTRL_SIZE)
328 b->curr_line->next = new_line_control ();
329 b->curr_line = b->curr_line->next;
332 l = b->curr_line;
334 /* Record the start of the line, and update counters. */
335 l->starts[l->insert_index].str = line_start;
336 l->starts[l->insert_index].len = line_len;
337 l->used++;
338 l->insert_index++;
341 /* Scan the buffer in B for newline characters
342 and record the line start locations and lengths in B.
343 Return the number of lines found in this buffer.
345 There may be an incomplete line at the end of the buffer;
346 a pointer is kept to this area, which will be used when
347 the next buffer is filled. */
349 static size_t
350 record_line_starts (struct buffer_record *b)
352 char *line_start; /* Start of current line. */
353 char *line_end; /* End of each line found. */
354 size_t bytes_left; /* Length of incomplete last line. */
355 size_t lines; /* Number of lines found. */
356 size_t line_length; /* Length of each line found. */
358 if (b->bytes_used == 0)
359 return 0;
361 lines = 0;
362 line_start = b->buffer;
363 bytes_left = b->bytes_used;
365 for (;;)
367 line_end = memchr (line_start, '\n', bytes_left);
368 if (line_end == NULL)
369 break;
370 line_length = line_end - line_start + 1;
371 keep_new_line (b, line_start, line_length);
372 bytes_left -= line_length;
373 line_start = line_end + 1;
374 lines++;
377 /* Check for an incomplete last line. */
378 if (bytes_left)
380 if (have_read_eof)
382 keep_new_line (b, line_start, bytes_left);
383 lines++;
385 else
386 save_to_hold_area (line_start, bytes_left);
389 b->num_lines = lines;
390 b->first_available = b->start_line = last_line_number + 1;
391 last_line_number += lines;
393 return lines;
396 /* Return a new buffer with room to store SIZE bytes, plus
397 an extra byte for safety. */
399 static struct buffer_record *
400 create_new_buffer (size_t size)
402 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
404 new_buffer->buffer = xmalloc (size + 1);
406 new_buffer->bytes_alloc = size;
407 new_buffer->line_start = new_buffer->curr_line = NULL;
409 return new_buffer;
412 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
413 least that size is currently free, use it, otherwise create a new one. */
415 static struct buffer_record *
416 get_new_buffer (size_t min_size)
418 struct buffer_record *new_buffer; /* Buffer to return. */
419 size_t alloc_size; /* Actual size that will be requested. */
421 alloc_size = START_SIZE;
422 if (alloc_size < min_size)
424 size_t s = min_size - alloc_size + INCR_SIZE - 1;
425 alloc_size += s - s % INCR_SIZE;
428 new_buffer = create_new_buffer (alloc_size);
430 new_buffer->num_lines = 0;
431 new_buffer->bytes_used = 0;
432 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
433 new_buffer->next = NULL;
435 return new_buffer;
438 static void
439 free_buffer (struct buffer_record *buf)
441 free (buf->buffer);
444 /* Append buffer BUF to the linked list of buffers that contain
445 some data yet to be processed. */
447 static void
448 save_buffer (struct buffer_record *buf)
450 struct buffer_record *p;
452 buf->next = NULL;
453 buf->curr_line = buf->line_start;
455 if (head == NULL)
456 head = buf;
457 else
459 for (p = head; p->next; p = p->next)
460 /* Do nothing. */ ;
461 p->next = buf;
465 /* Fill a buffer of input.
467 Set the initial size of the buffer to a default.
468 Fill the buffer (from the hold area and input stream)
469 and find the individual lines.
470 If no lines are found (the buffer is too small to hold the next line),
471 release the current buffer (whose contents would have been put in the
472 hold area) and repeat the process with another large buffer until at least
473 one entire line has been read.
475 Return true if a new buffer was obtained, otherwise false
476 (in which case end-of-file must have been encountered). */
478 static bool
479 load_buffer (void)
481 struct buffer_record *b;
482 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
483 size_t bytes_avail; /* Size of new buffer created. */
484 size_t lines_found; /* Number of lines in this new buffer. */
485 char *p; /* Place to load into buffer. */
487 if (have_read_eof)
488 return false;
490 /* We must make the buffer at least as large as the amount of data
491 in the partial line left over from the last call. */
492 if (bytes_wanted < hold_count)
493 bytes_wanted = hold_count;
497 b = get_new_buffer (bytes_wanted);
498 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
499 p = b->buffer;
501 /* First check the `holding' area for a partial line. */
502 if (hold_count)
504 if (p != hold_area)
505 memcpy (p, hold_area, hold_count);
506 p += hold_count;
507 b->bytes_used += hold_count;
508 bytes_avail -= hold_count;
509 hold_count = 0;
512 b->bytes_used += read_input (p, bytes_avail);
514 lines_found = record_line_starts (b);
515 bytes_wanted = b->bytes_alloc * 2;
516 if (!lines_found)
517 free_buffer (b);
519 while (!lines_found && !have_read_eof);
521 if (lines_found)
522 save_buffer (b);
524 return lines_found != 0;
527 /* Return the line number of the first line that has not yet been retrieved. */
529 static uintmax_t
530 get_first_line_in_buffer (void)
532 if (head == NULL && !load_buffer ())
533 error (EXIT_FAILURE, errno, _("input disappeared"));
535 return head->first_available;
538 /* Return a pointer to the logical first line in the buffer and make the
539 next line the logical first line.
540 Return NULL if there is no more input. */
542 static struct cstring *
543 remove_line (void)
545 /* If non-NULL, this is the buffer for which the previous call
546 returned the final line. So now, presuming that line has been
547 processed, we can free the buffer and reset this pointer. */
548 static struct buffer_record *prev_buf = NULL;
550 struct cstring *line; /* Return value. */
551 struct line *l; /* For convenience. */
553 if (prev_buf)
555 free_buffer (prev_buf);
556 prev_buf = NULL;
559 if (head == NULL && !load_buffer ())
560 return NULL;
562 if (current_line < head->first_available)
563 current_line = head->first_available;
565 ++(head->first_available);
567 l = head->curr_line;
569 line = &l->starts[l->retrieve_index];
571 /* Advance index to next line. */
572 if (++l->retrieve_index == l->used)
574 /* Go on to the next line record. */
575 head->curr_line = l->next;
576 if (head->curr_line == NULL || head->curr_line->used == 0)
578 /* Go on to the next data block.
579 but first record the current one so we can free it
580 once the line we're returning has been processed. */
581 prev_buf = head;
582 head = head->next;
586 return line;
589 /* Search the buffers for line LINENUM, reading more input if necessary.
590 Return a pointer to the line, or NULL if it is not found in the file. */
592 static struct cstring *
593 find_line (uintmax_t linenum)
595 struct buffer_record *b;
597 if (head == NULL && !load_buffer ())
598 return NULL;
600 if (linenum < head->start_line)
601 return NULL;
603 for (b = head;;)
605 if (linenum < b->start_line + b->num_lines)
607 /* The line is in this buffer. */
608 struct line *l;
609 size_t offset; /* How far into the buffer the line is. */
611 l = b->line_start;
612 offset = linenum - b->start_line;
613 /* Find the control record. */
614 while (offset >= CTRL_SIZE)
616 l = l->next;
617 offset -= CTRL_SIZE;
619 return &l->starts[offset];
621 if (b->next == NULL && !load_buffer ())
622 return NULL;
623 b = b->next; /* Try the next data block. */
627 /* Return true if at least one more line is available for input. */
629 static bool
630 no_more_lines (void)
632 return find_line (current_line + 1) == NULL;
635 /* Set the name of the input file to NAME and open it. */
637 static void
638 set_input_file (const char *name)
640 if (STREQ (name, "-"))
641 input_desc = 0;
642 else
644 input_desc = open (name, O_RDONLY);
645 if (input_desc < 0)
646 error (EXIT_FAILURE, errno, "%s", name);
650 /* Write all lines from the beginning of the buffer up to, but
651 not including, line LAST_LINE, to the current output file.
652 If IGNORE is true, do not output lines selected here.
653 ARGNUM is the index in ARGV of the current pattern. */
655 static void
656 write_to_file (uintmax_t last_line, bool ignore, int argnum)
658 struct cstring *line;
659 uintmax_t first_line; /* First available input line. */
660 uintmax_t lines; /* Number of lines to output. */
661 uintmax_t i;
663 first_line = get_first_line_in_buffer ();
665 if (first_line > last_line)
667 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
668 cleanup_fatal ();
671 lines = last_line - first_line;
673 for (i = 0; i < lines; i++)
675 line = remove_line ();
676 if (line == NULL)
678 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
679 cleanup_fatal ();
681 if (!ignore)
682 save_line_to_file (line);
686 /* Output any lines left after all regexps have been processed. */
688 static void
689 dump_rest_of_file (void)
691 struct cstring *line;
693 while ((line = remove_line ()) != NULL)
694 save_line_to_file (line);
697 /* Handle an attempt to read beyond EOF under the control of record P,
698 on iteration REPETITION if nonzero. */
700 static void handle_line_error (const struct control *, uintmax_t)
701 ATTRIBUTE_NORETURN;
702 static void
703 handle_line_error (const struct control *p, uintmax_t repetition)
705 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
707 fprintf (stderr, _("%s: `%s': line number out of range"),
708 program_name, umaxtostr (p->lines_required, buf));
709 if (repetition)
710 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
711 else
712 fprintf (stderr, "\n");
714 cleanup_fatal ();
717 /* Determine the line number that marks the end of this file,
718 then get those lines and save them to the output file.
719 P is the control record.
720 REPETITION is the repetition number. */
722 static void
723 process_line_count (const struct control *p, uintmax_t repetition)
725 uintmax_t linenum;
726 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
727 struct cstring *line;
729 create_output_file ();
731 linenum = get_first_line_in_buffer ();
733 while (linenum++ < last_line_to_save)
735 line = remove_line ();
736 if (line == NULL)
737 handle_line_error (p, repetition);
738 save_line_to_file (line);
741 close_output_file ();
743 /* Ensure that the line number specified is not 1 greater than
744 the number of lines in the file. */
745 if (no_more_lines ())
746 handle_line_error (p, repetition);
749 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
750 static void
751 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
753 fprintf (stderr, _("%s: `%s': match not found"),
754 program_name, global_argv[p->argnum]);
756 if (repetition)
758 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
759 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
761 else
762 fprintf (stderr, "\n");
764 if (!ignore)
766 dump_rest_of_file ();
767 close_output_file ();
769 cleanup_fatal ();
772 /* Read the input until a line matches the regexp in P, outputting
773 it unless P->IGNORE is true.
774 REPETITION is this repeat-count; 0 means the first time. */
776 static void
777 process_regexp (struct control *p, uintmax_t repetition)
779 struct cstring *line; /* From input file. */
780 size_t line_len; /* To make "$" in regexps work. */
781 uintmax_t break_line; /* First line number of next file. */
782 bool ignore = p->ignore; /* If true, skip this section. */
783 int ret;
785 if (!ignore)
786 create_output_file ();
788 /* If there is no offset for the regular expression, or
789 it is positive, then it is not necessary to buffer the lines. */
791 if (p->offset >= 0)
793 for (;;)
795 line = find_line (++current_line);
796 if (line == NULL)
798 if (p->repeat_forever)
800 if (!ignore)
802 dump_rest_of_file ();
803 close_output_file ();
805 exit (EXIT_SUCCESS);
807 else
808 regexp_error (p, repetition, ignore);
810 line_len = line->len;
811 if (line->str[line_len - 1] == '\n')
812 line_len--;
813 ret = re_search (&p->re_compiled, line->str, line_len,
814 0, line_len, NULL);
815 if (ret == -2)
817 error (0, 0, _("error in regular expression search"));
818 cleanup_fatal ();
820 if (ret == -1)
822 line = remove_line ();
823 if (!ignore)
824 save_line_to_file (line);
826 else
827 break;
830 else
832 /* Buffer the lines. */
833 for (;;)
835 line = find_line (++current_line);
836 if (line == NULL)
838 if (p->repeat_forever)
840 if (!ignore)
842 dump_rest_of_file ();
843 close_output_file ();
845 exit (EXIT_SUCCESS);
847 else
848 regexp_error (p, repetition, ignore);
850 line_len = line->len;
851 if (line->str[line_len - 1] == '\n')
852 line_len--;
853 ret = re_search (&p->re_compiled, line->str, line_len,
854 0, line_len, NULL);
855 if (ret == -2)
857 error (0, 0, _("error in regular expression search"));
858 cleanup_fatal ();
860 if (ret >= 0)
861 break;
865 /* Account for any offset from this regexp. */
866 break_line = current_line + p->offset;
868 write_to_file (break_line, ignore, p->argnum);
870 if (!ignore)
871 close_output_file ();
873 if (p->offset > 0)
874 current_line = break_line;
877 /* Split the input file according to the control records we have built. */
879 static void
880 split_file (void)
882 size_t i;
884 for (i = 0; i < control_used; i++)
886 uintmax_t j;
887 if (controls[i].regexpr)
889 for (j = 0; (controls[i].repeat_forever
890 || j <= controls[i].repeat); j++)
891 process_regexp (&controls[i], j);
893 else
895 for (j = 0; (controls[i].repeat_forever
896 || j <= controls[i].repeat); j++)
897 process_line_count (&controls[i], j);
901 create_output_file ();
902 dump_rest_of_file ();
903 close_output_file ();
906 /* Return the name of output file number NUM. */
908 static char *
909 make_filename (unsigned int num)
911 strcpy (filename_space, prefix);
912 if (suffix)
913 sprintf (filename_space+strlen(prefix), suffix, num);
914 else
915 sprintf (filename_space+strlen(prefix), "%0*u", digits, num);
916 return filename_space;
919 /* Create the next output file. */
921 static void
922 create_output_file (void)
924 sigset_t oldset;
925 bool fopen_ok;
926 int fopen_errno;
928 output_filename = make_filename (files_created);
930 /* Create the output file in a critical section, to avoid races. */
931 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
932 output_stream = fopen (output_filename, "w");
933 fopen_ok = (output_stream != NULL);
934 fopen_errno = errno;
935 files_created += fopen_ok;
936 sigprocmask (SIG_SETMASK, &oldset, NULL);
938 if (! fopen_ok)
940 error (0, fopen_errno, "%s", output_filename);
941 cleanup_fatal ();
943 bytes_written = 0;
946 /* If requested, delete all the files we have created. This function
947 must be called only from critical sections. */
949 static void
950 delete_all_files (void)
952 unsigned int i;
954 if (! remove_files)
955 return;
957 for (i = 0; i < files_created; i++)
959 const char *name = make_filename (i);
960 if (unlink (name))
961 error (0, errno, "%s", name);
964 files_created = 0;
967 /* Close the current output file and print the count
968 of characters in this file. */
970 static void
971 close_output_file (void)
973 if (output_stream)
975 if (ferror (output_stream))
977 error (0, 0, _("write error for `%s'"), output_filename);
978 output_stream = NULL;
979 cleanup_fatal ();
981 if (fclose (output_stream) != 0)
983 error (0, errno, "%s", output_filename);
984 output_stream = NULL;
985 cleanup_fatal ();
987 if (bytes_written == 0 && elide_empty_files)
989 sigset_t oldset;
990 bool unlink_ok;
991 int unlink_errno;
993 /* Remove the output file in a critical section, to avoid races. */
994 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
995 unlink_ok = (unlink (output_filename) == 0);
996 unlink_errno = errno;
997 files_created -= unlink_ok;
998 sigprocmask (SIG_SETMASK, &oldset, NULL);
1000 if (! unlink_ok)
1001 error (0, unlink_errno, "%s", output_filename);
1003 else
1005 if (!suppress_count)
1007 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1008 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1011 output_stream = NULL;
1015 /* Save line LINE to the output file and
1016 increment the character count for the current file. */
1018 static void
1019 save_line_to_file (const struct cstring *line)
1021 fwrite (line->str, sizeof (char), line->len, output_stream);
1022 bytes_written += line->len;
1025 /* Return a new, initialized control record. */
1027 static struct control *
1028 new_control_record (void)
1030 static size_t control_allocated = 0; /* Total space allocated. */
1031 struct control *p;
1033 if (control_used == control_allocated)
1034 controls = x2nrealloc (controls, &control_allocated, sizeof *controls);
1035 p = &controls[control_used++];
1036 p->regexpr = NULL;
1037 p->repeat = 0;
1038 p->repeat_forever = false;
1039 p->lines_required = 0;
1040 p->offset = 0;
1041 return p;
1044 /* Check if there is a numeric offset after a regular expression.
1045 STR is the entire command line argument.
1046 P is the control record for this regular expression.
1047 NUM is the numeric part of STR. */
1049 static void
1050 check_for_offset (struct control *p, const char *str, const char *num)
1052 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1053 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1056 /* Given that the first character of command line arg STR is '{',
1057 make sure that the rest of the string is a valid repeat count
1058 and store its value in P.
1059 ARGNUM is the ARGV index of STR. */
1061 static void
1062 parse_repeat_count (int argnum, struct control *p, char *str)
1064 uintmax_t val;
1065 char *end;
1067 end = str + strlen (str) - 1;
1068 if (*end != '}')
1069 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1070 *end = '\0';
1072 if (str+1 == end-1 && *(str+1) == '*')
1073 p->repeat_forever = true;
1074 else
1076 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1078 error (EXIT_FAILURE, 0,
1079 _("%s}: integer required between `{' and `}'"),
1080 global_argv[argnum]);
1082 p->repeat = val;
1085 *end = '}';
1088 /* Extract the regular expression from STR and check for a numeric offset.
1089 STR should start with the regexp delimiter character.
1090 Return a new control record for the regular expression.
1091 ARGNUM is the ARGV index of STR.
1092 Unless IGNORE is true, mark these lines for output. */
1094 static struct control *
1095 extract_regexp (int argnum, bool ignore, char *str)
1097 size_t len; /* Number of bytes in this regexp. */
1098 char delim = *str;
1099 char *closing_delim;
1100 struct control *p;
1101 const char *err;
1103 closing_delim = strrchr (str + 1, delim);
1104 if (closing_delim == NULL)
1105 error (EXIT_FAILURE, 0,
1106 _("%s: closing delimiter `%c' missing"), str, delim);
1108 len = closing_delim - str - 1;
1109 p = new_control_record ();
1110 p->argnum = argnum;
1111 p->ignore = ignore;
1113 p->regexpr = xmalloc (len + 1);
1114 strncpy (p->regexpr, str + 1, len);
1115 p->re_compiled.allocated = len * 2;
1116 p->re_compiled.buffer = xmalloc (p->re_compiled.allocated);
1117 p->re_compiled.fastmap = xmalloc (1 << CHAR_BIT);
1118 p->re_compiled.translate = 0;
1119 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1120 if (err)
1122 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1123 cleanup_fatal ();
1126 if (closing_delim[1])
1127 check_for_offset (p, str, closing_delim + 1);
1129 return p;
1132 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1133 After each pattern, check if the next argument is a repeat count. */
1135 static void
1136 parse_patterns (int argc, int start, char **argv)
1138 int i; /* Index into ARGV. */
1139 struct control *p; /* New control record created. */
1140 uintmax_t val;
1141 static uintmax_t last_val = 0;
1143 for (i = start; i < argc; i++)
1145 if (*argv[i] == '/' || *argv[i] == '%')
1147 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1149 else
1151 p = new_control_record ();
1152 p->argnum = i;
1154 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1155 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1156 if (val == 0)
1157 error (EXIT_FAILURE, 0,
1158 _("%s: line number must be greater than zero"),
1159 argv[i]);
1160 if (val < last_val)
1162 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1163 error (EXIT_FAILURE, 0,
1164 _("line number `%s' is smaller than preceding line number, %s"),
1165 argv[i], umaxtostr (last_val, buf));
1168 if (val == last_val)
1169 error (0, 0,
1170 _("warning: line number `%s' is the same as preceding line number"),
1171 argv[i]);
1173 last_val = val;
1175 p->lines_required = val;
1178 if (i + 1 < argc && *argv[i + 1] == '{')
1180 /* We have a repeat count. */
1181 i++;
1182 parse_repeat_count (i, p, argv[i]);
1187 static unsigned int
1188 get_format_flags (char **format_ptr)
1190 unsigned int count = 0;
1192 for (; **format_ptr; (*format_ptr)++)
1194 switch (**format_ptr)
1196 case '-':
1197 break;
1199 case '+':
1200 case ' ':
1201 count |= 1;
1202 break;
1204 case '#':
1205 count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
1206 break;
1208 default:
1209 return count;
1212 return count;
1215 static size_t
1216 get_format_width (char **format_ptr)
1218 unsigned long int val = 0;
1220 if (ISDIGIT (**format_ptr)
1221 && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1222 || SIZE_MAX < val))
1223 error (EXIT_FAILURE, 0, _("invalid format width"));
1225 /* Allow for enough octal digits to represent the value of UINT_MAX,
1226 even if the field width is less than that. */
1227 return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
1230 static size_t
1231 get_format_prec (char **format_ptr)
1233 if (**format_ptr != '.')
1234 return 0;
1235 (*format_ptr)++;
1237 if (! ISDIGIT (**format_ptr))
1238 return 0;
1239 else
1241 unsigned long int val;
1242 if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1243 || SIZE_MAX < val)
1244 error (EXIT_FAILURE, 0, _("invalid format precision"));
1245 return val;
1249 static void
1250 get_format_conv_type (char **format_ptr)
1252 unsigned char ch = *(*format_ptr)++;
1254 switch (ch)
1256 case 'd':
1257 case 'i':
1258 case 'o':
1259 case 'u':
1260 case 'x':
1261 case 'X':
1262 break;
1264 case 0:
1265 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1266 break;
1268 default:
1269 if (ISPRINT (ch))
1270 error (EXIT_FAILURE, 0,
1271 _("invalid conversion specifier in suffix: %c"), ch);
1272 else
1273 error (EXIT_FAILURE, 0,
1274 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1278 static size_t
1279 max_out (char *format)
1281 size_t out_count = 0;
1282 bool percent = false;
1284 while (*format)
1286 if (*format++ != '%')
1287 out_count++;
1288 else if (*format == '%')
1290 format++;
1291 out_count++;
1293 else
1295 if (percent)
1296 error (EXIT_FAILURE, 0,
1297 _("too many %% conversion specifications in suffix"));
1298 percent = true;
1299 out_count += get_format_flags (&format);
1301 size_t width = get_format_width (&format);
1302 size_t prec = get_format_prec (&format);
1304 out_count += MAX (width, prec);
1306 get_format_conv_type (&format);
1310 if (! percent)
1311 error (EXIT_FAILURE, 0,
1312 _("missing %% conversion specification in suffix"));
1314 return out_count;
1318 main (int argc, char **argv)
1320 int optc;
1321 unsigned long int val;
1323 initialize_main (&argc, &argv);
1324 program_name = argv[0];
1325 setlocale (LC_ALL, "");
1326 bindtextdomain (PACKAGE, LOCALEDIR);
1327 textdomain (PACKAGE);
1329 atexit (close_stdout);
1331 global_argv = argv;
1332 controls = NULL;
1333 control_used = 0;
1334 suppress_count = false;
1335 remove_files = true;
1336 prefix = DEFAULT_PREFIX;
1338 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1339 switch (optc)
1341 case 'f':
1342 prefix = optarg;
1343 break;
1345 case 'b':
1346 suffix = optarg;
1347 break;
1349 case 'k':
1350 remove_files = false;
1351 break;
1353 case 'n':
1354 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1355 || val > INT_MAX)
1356 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1357 digits = val;
1358 break;
1360 case 's':
1361 case 'q':
1362 suppress_count = true;
1363 break;
1365 case 'z':
1366 elide_empty_files = true;
1367 break;
1369 case_GETOPT_HELP_CHAR;
1371 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1373 default:
1374 usage (EXIT_FAILURE);
1377 if (argc - optind < 2)
1379 if (argc <= optind)
1380 error (0, 0, _("missing operand"));
1381 else
1382 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1383 usage (EXIT_FAILURE);
1386 if (suffix)
1387 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1388 else
1389 filename_space = xmalloc (strlen (prefix) + digits + 2);
1391 set_input_file (argv[optind++]);
1393 parse_patterns (argc, optind, argv);
1396 int i;
1397 static int const sig[] = { SIGHUP, SIGINT, SIGQUIT, SIGTERM };
1398 enum { nsigs = sizeof sig / sizeof sig[0] };
1400 #ifdef SA_NOCLDSTOP
1401 struct sigaction act;
1403 sigemptyset (&caught_signals);
1404 for (i = 0; i < nsigs; i++)
1406 sigaction (sig[i], NULL, &act);
1407 if (act.sa_handler != SIG_IGN)
1408 sigaddset (&caught_signals, sig[i]);
1411 act.sa_handler = interrupt_handler;
1412 act.sa_mask = caught_signals;
1413 act.sa_flags = 0;
1415 for (i = 0; i < nsigs; i++)
1416 if (sigismember (&caught_signals, sig[i]))
1417 sigaction (sig[i], &act, NULL);
1418 #else
1419 for (i = 0; i < nsigs; i++)
1420 if (signal (sig[i], SIG_IGN) != SIG_IGN)
1421 signal (sig[i], interrupt_handler);
1422 #endif
1425 split_file ();
1427 if (close (input_desc) < 0)
1429 error (0, errno, _("read error"));
1430 cleanup_fatal ();
1433 exit (EXIT_SUCCESS);
1436 void
1437 usage (int status)
1439 if (status != EXIT_SUCCESS)
1440 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1441 program_name);
1442 else
1444 printf (_("\
1445 Usage: %s [OPTION]... FILE PATTERN...\n\
1447 program_name);
1448 fputs (_("\
1449 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1450 and output byte counts of each piece to standard output.\n\
1452 "), stdout);
1453 fputs (_("\
1454 Mandatory arguments to long options are mandatory for short options too.\n\
1455 "), stdout);
1456 fputs (_("\
1457 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1458 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1459 -k, --keep-files do not remove output files on errors\n\
1460 "), stdout);
1461 fputs (_("\
1462 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1463 -s, --quiet, --silent do not print counts of output file sizes\n\
1464 -z, --elide-empty-files remove empty output files\n\
1465 "), stdout);
1466 fputs (HELP_OPTION_DESCRIPTION, stdout);
1467 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1468 fputs (_("\
1470 Read standard input if FILE is -. Each PATTERN may be:\n\
1471 "), stdout);
1472 fputs (_("\
1474 INTEGER copy up to but not including specified line number\n\
1475 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1476 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1477 {INTEGER} repeat the previous pattern specified number of times\n\
1478 {*} repeat the previous pattern as many times as possible\n\
1480 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1481 "), stdout);
1482 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
1484 exit (status);