tests: adjust memory limits in head-c.sh
[coreutils.git] / src / csplit.c
blobac92d4c433bbdd5c9812345fb0030f43296cc54c
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <assert.h>
23 #include <getopt.h>
24 #include <sys/types.h>
25 #include <signal.h>
27 #include "system.h"
29 #include <regex.h>
31 #include "error.h"
32 #include "fd-reopen.h"
33 #include "quote.h"
34 #include "safe-read.h"
35 #include "stdio--.h"
36 #include "xdectoint.h"
37 #include "xstrtol.h"
39 /* The official name of this program (e.g., no 'g' prefix). */
40 #define PROGRAM_NAME "csplit"
42 #define AUTHORS \
43 proper_name ("Stuart Kemp"), \
44 proper_name ("David MacKenzie")
46 /* The default prefix for output file names. */
47 #define DEFAULT_PREFIX "xx"
49 /* A compiled pattern arg. */
50 struct control
52 intmax_t offset; /* Offset from regexp to split at. */
53 uintmax_t lines_required; /* Number of lines required. */
54 uintmax_t repeat; /* Repeat count. */
55 int argnum; /* ARGV index. */
56 bool repeat_forever; /* True if '*' used as a repeat count. */
57 bool ignore; /* If true, produce no output (for regexp). */
58 bool regexpr; /* True if regular expression was used. */
59 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
62 /* Initial size of data area in buffers. */
63 #define START_SIZE 8191
65 /* Increment size for data area. */
66 #define INCR_SIZE 2048
68 /* Number of lines kept in each node in line list. */
69 #define CTRL_SIZE 80
71 #ifdef DEBUG
72 /* Some small values to test the algorithms. */
73 # define START_SIZE 200
74 # define INCR_SIZE 10
75 # define CTRL_SIZE 1
76 #endif
78 /* A string with a length count. */
79 struct cstring
81 size_t len;
82 char *str;
85 /* Pointers to the beginnings of lines in the buffer area.
86 These structures are linked together if needed. */
87 struct line
89 size_t used; /* Number of offsets used in this struct. */
90 size_t insert_index; /* Next offset to use when inserting line. */
91 size_t retrieve_index; /* Next index to use when retrieving line. */
92 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
93 struct line *next; /* Next in linked list. */
96 /* The structure to hold the input lines.
97 Contains a pointer to the data area and a list containing
98 pointers to the individual lines. */
99 struct buffer_record
101 size_t bytes_alloc; /* Size of the buffer area. */
102 size_t bytes_used; /* Bytes used in the buffer area. */
103 uintmax_t start_line; /* First line number in this buffer. */
104 uintmax_t first_available; /* First line that can be retrieved. */
105 size_t num_lines; /* Number of complete lines in this buffer. */
106 char *buffer; /* Data area. */
107 struct line *line_start; /* Head of list of pointers to lines. */
108 struct line *curr_line; /* The line start record currently in use. */
109 struct buffer_record *next;
112 static void close_output_file (void);
113 static void create_output_file (void);
114 static void delete_all_files (bool);
115 static void save_line_to_file (const struct cstring *line);
117 /* Start of buffer list. */
118 static struct buffer_record *head = NULL;
120 /* Partially read line. */
121 static char *hold_area = NULL;
123 /* Number of bytes in 'hold_area'. */
124 static size_t hold_count = 0;
126 /* Number of the last line in the buffers. */
127 static uintmax_t last_line_number = 0;
129 /* Number of the line currently being examined. */
130 static uintmax_t current_line = 0;
132 /* If true, we have read EOF. */
133 static bool have_read_eof = false;
135 /* Name of output files. */
136 static char *volatile filename_space = NULL;
138 /* Prefix part of output file names. */
139 static char const *volatile prefix = NULL;
141 /* Suffix part of output file names. */
142 static char *volatile suffix = NULL;
144 /* Number of digits to use in output file names. */
145 static int volatile digits = 2;
147 /* Number of files created so far. */
148 static unsigned int volatile files_created = 0;
150 /* Number of bytes written to current file. */
151 static uintmax_t bytes_written;
153 /* Output file pointer. */
154 static FILE *output_stream = NULL;
156 /* Output file name. */
157 static char *output_filename = NULL;
159 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
160 static char **global_argv;
162 /* If true, do not print the count of bytes in each output file. */
163 static bool suppress_count;
165 /* If true, remove output files on error. */
166 static bool volatile remove_files;
168 /* If true, remove all output files which have a zero length. */
169 static bool elide_empty_files;
171 /* If true, suppress the lines that match the PATTERN */
172 static bool suppress_matched;
174 /* The compiled pattern arguments, which determine how to split
175 the input file. */
176 static struct control *controls;
178 /* Number of elements in 'controls'. */
179 static size_t control_used;
181 /* The set of signals that are caught. */
182 static sigset_t caught_signals;
184 /* For long options that have no equivalent short option, use a
185 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
186 enum
188 SUPPRESS_MATCHED_OPTION = CHAR_MAX + 1
191 static struct option const longopts[] =
193 {"digits", required_argument, NULL, 'n'},
194 {"quiet", no_argument, NULL, 'q'},
195 {"silent", no_argument, NULL, 's'},
196 {"keep-files", no_argument, NULL, 'k'},
197 {"elide-empty-files", no_argument, NULL, 'z'},
198 {"prefix", required_argument, NULL, 'f'},
199 {"suffix-format", required_argument, NULL, 'b'},
200 {"suppress-matched", no_argument, NULL, SUPPRESS_MATCHED_OPTION},
201 {GETOPT_HELP_OPTION_DECL},
202 {GETOPT_VERSION_OPTION_DECL},
203 {NULL, 0, NULL, 0}
206 /* Optionally remove files created so far; then exit.
207 Called when an error detected. */
209 static void
210 cleanup (void)
212 sigset_t oldset;
214 close_output_file ();
216 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
217 delete_all_files (false);
218 sigprocmask (SIG_SETMASK, &oldset, NULL);
221 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
222 static void
223 cleanup_fatal (void)
225 cleanup ();
226 exit (EXIT_FAILURE);
229 extern void
230 xalloc_die (void)
232 error (0, 0, "%s", _("memory exhausted"));
233 cleanup_fatal ();
236 static void
237 interrupt_handler (int sig)
239 delete_all_files (true);
240 signal (sig, SIG_DFL);
241 /* The signal has been reset to SIG_DFL, but blocked during this
242 handler. Force the default action of this signal once the
243 handler returns and the block is removed. */
244 raise (sig);
247 /* Keep track of NUM bytes of a partial line in buffer START.
248 These bytes will be retrieved later when another large buffer is read. */
250 static void
251 save_to_hold_area (char *start, size_t num)
253 free (hold_area);
254 hold_area = start;
255 hold_count = num;
258 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
259 Return the number of bytes read. */
261 static size_t
262 read_input (char *dest, size_t max_n_bytes)
264 size_t bytes_read;
266 if (max_n_bytes == 0)
267 return 0;
269 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
271 if (bytes_read == 0)
272 have_read_eof = true;
274 if (bytes_read == SAFE_READ_ERROR)
276 error (0, errno, _("read error"));
277 cleanup_fatal ();
280 return bytes_read;
283 /* Initialize existing line record P. */
285 static void
286 clear_line_control (struct line *p)
288 p->used = 0;
289 p->insert_index = 0;
290 p->retrieve_index = 0;
293 /* Return a new, initialized line record. */
295 static struct line *
296 new_line_control (void)
298 struct line *p = xmalloc (sizeof *p);
300 p->next = NULL;
301 clear_line_control (p);
303 return p;
306 /* Record LINE_START, which is the address of the start of a line
307 of length LINE_LEN in the large buffer, in the lines buffer of B. */
309 static void
310 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
312 struct line *l;
314 /* If there is no existing area to keep line info, get some. */
315 if (b->line_start == NULL)
316 b->line_start = b->curr_line = new_line_control ();
318 /* If existing area for lines is full, get more. */
319 if (b->curr_line->used == CTRL_SIZE)
321 b->curr_line->next = new_line_control ();
322 b->curr_line = b->curr_line->next;
325 l = b->curr_line;
327 /* Record the start of the line, and update counters. */
328 l->starts[l->insert_index].str = line_start;
329 l->starts[l->insert_index].len = line_len;
330 l->used++;
331 l->insert_index++;
334 /* Scan the buffer in B for newline characters
335 and record the line start locations and lengths in B.
336 Return the number of lines found in this buffer.
338 There may be an incomplete line at the end of the buffer;
339 a pointer is kept to this area, which will be used when
340 the next buffer is filled. */
342 static size_t
343 record_line_starts (struct buffer_record *b)
345 char *line_start; /* Start of current line. */
346 char *line_end; /* End of each line found. */
347 size_t bytes_left; /* Length of incomplete last line. */
348 size_t lines; /* Number of lines found. */
349 size_t line_length; /* Length of each line found. */
351 if (b->bytes_used == 0)
352 return 0;
354 lines = 0;
355 line_start = b->buffer;
356 bytes_left = b->bytes_used;
358 while (true)
360 line_end = memchr (line_start, '\n', bytes_left);
361 if (line_end == NULL)
362 break;
363 line_length = line_end - line_start + 1;
364 keep_new_line (b, line_start, line_length);
365 bytes_left -= line_length;
366 line_start = line_end + 1;
367 lines++;
370 /* Check for an incomplete last line. */
371 if (bytes_left)
373 if (have_read_eof)
375 keep_new_line (b, line_start, bytes_left);
376 lines++;
378 else
379 save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
382 b->num_lines = lines;
383 b->first_available = b->start_line = last_line_number + 1;
384 last_line_number += lines;
386 return lines;
389 /* Return a new buffer with room to store SIZE bytes, plus
390 an extra byte for safety. */
392 static struct buffer_record *
393 create_new_buffer (size_t size)
395 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
397 new_buffer->buffer = xmalloc (size + 1);
399 new_buffer->bytes_alloc = size;
400 new_buffer->line_start = new_buffer->curr_line = NULL;
402 return new_buffer;
405 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
406 least that size is currently free, use it, otherwise create a new one. */
408 static struct buffer_record *
409 get_new_buffer (size_t min_size)
411 struct buffer_record *new_buffer; /* Buffer to return. */
412 size_t alloc_size; /* Actual size that will be requested. */
414 alloc_size = START_SIZE;
415 if (alloc_size < min_size)
417 size_t s = min_size - alloc_size + INCR_SIZE - 1;
418 alloc_size += s - s % INCR_SIZE;
421 new_buffer = create_new_buffer (alloc_size);
423 new_buffer->num_lines = 0;
424 new_buffer->bytes_used = 0;
425 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
426 new_buffer->next = NULL;
428 return new_buffer;
431 static void
432 free_buffer (struct buffer_record *buf)
434 struct line *l;
435 for (l = buf->line_start; l;)
437 struct line *n = l->next;
438 free (l);
439 l = n;
441 buf->line_start = NULL;
442 free (buf->buffer);
443 buf->buffer = NULL;
446 /* Append buffer BUF to the linked list of buffers that contain
447 some data yet to be processed. */
449 static void
450 save_buffer (struct buffer_record *buf)
452 struct buffer_record *p;
454 buf->next = NULL;
455 buf->curr_line = buf->line_start;
457 if (head == NULL)
458 head = buf;
459 else
461 for (p = head; p->next; p = p->next)
462 /* Do nothing. */ ;
463 p->next = buf;
467 /* Fill a buffer of input.
469 Set the initial size of the buffer to a default.
470 Fill the buffer (from the hold area and input stream)
471 and find the individual lines.
472 If no lines are found (the buffer is too small to hold the next line),
473 release the current buffer (whose contents would have been put in the
474 hold area) and repeat the process with another large buffer until at least
475 one entire line has been read.
477 Return true if a new buffer was obtained, otherwise false
478 (in which case end-of-file must have been encountered). */
480 static bool
481 load_buffer (void)
483 struct buffer_record *b;
484 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
485 size_t bytes_avail; /* Size of new buffer created. */
486 size_t lines_found; /* Number of lines in this new buffer. */
487 char *p; /* Place to load into buffer. */
489 if (have_read_eof)
490 return false;
492 /* We must make the buffer at least as large as the amount of data
493 in the partial line left over from the last call. */
494 if (bytes_wanted < hold_count)
495 bytes_wanted = hold_count;
497 while (1)
499 b = get_new_buffer (bytes_wanted);
500 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
501 p = b->buffer;
503 /* First check the 'holding' area for a partial line. */
504 if (hold_count)
506 memcpy (p, hold_area, hold_count);
507 p += hold_count;
508 b->bytes_used += hold_count;
509 bytes_avail -= hold_count;
510 hold_count = 0;
513 b->bytes_used += read_input (p, bytes_avail);
515 lines_found = record_line_starts (b);
517 if (lines_found || have_read_eof)
518 break;
520 if (xalloc_oversized (2, b->bytes_alloc))
521 xalloc_die ();
522 bytes_wanted = 2 * b->bytes_alloc;
523 free_buffer (b);
524 free (b);
527 if (lines_found)
528 save_buffer (b);
529 else
531 free_buffer (b);
532 free (b);
535 return lines_found != 0;
538 /* Return the line number of the first line that has not yet been retrieved. */
540 static uintmax_t
541 get_first_line_in_buffer (void)
543 if (head == NULL && !load_buffer ())
544 error (EXIT_FAILURE, errno, _("input disappeared"));
546 return head->first_available;
549 /* Return a pointer to the logical first line in the buffer and make the
550 next line the logical first line.
551 Return NULL if there is no more input. */
553 static struct cstring *
554 remove_line (void)
556 /* If non-NULL, this is the buffer for which the previous call
557 returned the final line. So now, presuming that line has been
558 processed, we can free the buffer and reset this pointer. */
559 static struct buffer_record *prev_buf = NULL;
561 struct cstring *line; /* Return value. */
562 struct line *l; /* For convenience. */
564 if (prev_buf)
566 free_buffer (prev_buf);
567 free (prev_buf);
568 prev_buf = NULL;
571 if (head == NULL && !load_buffer ())
572 return NULL;
574 if (current_line < head->first_available)
575 current_line = head->first_available;
577 ++(head->first_available);
579 l = head->curr_line;
581 line = &l->starts[l->retrieve_index];
583 /* Advance index to next line. */
584 if (++l->retrieve_index == l->used)
586 /* Go on to the next line record. */
587 head->curr_line = l->next;
588 if (head->curr_line == NULL || head->curr_line->used == 0)
590 /* Go on to the next data block.
591 but first record the current one so we can free it
592 once the line we're returning has been processed. */
593 prev_buf = head;
594 head = head->next;
598 return line;
601 /* Search the buffers for line LINENUM, reading more input if necessary.
602 Return a pointer to the line, or NULL if it is not found in the file. */
604 static struct cstring *
605 find_line (uintmax_t linenum)
607 struct buffer_record *b;
609 if (head == NULL && !load_buffer ())
610 return NULL;
612 if (linenum < head->start_line)
613 return NULL;
615 for (b = head;;)
617 assert (b);
618 if (linenum < b->start_line + b->num_lines)
620 /* The line is in this buffer. */
621 struct line *l;
622 size_t offset; /* How far into the buffer the line is. */
624 l = b->line_start;
625 offset = linenum - b->start_line;
626 /* Find the control record. */
627 while (offset >= CTRL_SIZE)
629 l = l->next;
630 offset -= CTRL_SIZE;
632 return &l->starts[offset];
634 if (b->next == NULL && !load_buffer ())
635 return NULL;
636 b = b->next; /* Try the next data block. */
640 /* Return true if at least one more line is available for input. */
642 static bool
643 no_more_lines (void)
645 return find_line (current_line + 1) == NULL;
648 /* Open NAME as standard input. */
650 static void
651 set_input_file (const char *name)
653 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
654 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
655 quoteaf (name));
658 /* Write all lines from the beginning of the buffer up to, but
659 not including, line LAST_LINE, to the current output file.
660 If IGNORE is true, do not output lines selected here.
661 ARGNUM is the index in ARGV of the current pattern. */
663 static void
664 write_to_file (uintmax_t last_line, bool ignore, int argnum)
666 struct cstring *line;
667 uintmax_t first_line; /* First available input line. */
668 uintmax_t lines; /* Number of lines to output. */
669 uintmax_t i;
671 first_line = get_first_line_in_buffer ();
673 if (first_line > last_line)
675 error (0, 0, _("%s: line number out of range"),
676 quote (global_argv[argnum]));
677 cleanup_fatal ();
680 lines = last_line - first_line;
682 for (i = 0; i < lines; i++)
684 line = remove_line ();
685 if (line == NULL)
687 error (0, 0, _("%s: line number out of range"),
688 quote (global_argv[argnum]));
689 cleanup_fatal ();
691 if (!ignore)
692 save_line_to_file (line);
696 /* Output any lines left after all regexps have been processed. */
698 static void
699 dump_rest_of_file (void)
701 struct cstring *line;
703 while ((line = remove_line ()) != NULL)
704 save_line_to_file (line);
707 /* Handle an attempt to read beyond EOF under the control of record P,
708 on iteration REPETITION if nonzero. */
710 static void handle_line_error (const struct control *, uintmax_t)
711 ATTRIBUTE_NORETURN;
712 static void
713 handle_line_error (const struct control *p, uintmax_t repetition)
715 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
717 fprintf (stderr, _("%s: %s: line number out of range"),
718 program_name, quote (umaxtostr (p->lines_required, buf)));
719 if (repetition)
720 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
721 else
722 fprintf (stderr, "\n");
724 cleanup_fatal ();
727 /* Determine the line number that marks the end of this file,
728 then get those lines and save them to the output file.
729 P is the control record.
730 REPETITION is the repetition number. */
732 static void
733 process_line_count (const struct control *p, uintmax_t repetition)
735 uintmax_t linenum;
736 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
738 create_output_file ();
740 /* Ensure that the line number specified is not 1 greater than
741 the number of lines in the file.
742 When suppressing matched lines, check before the loop. */
743 if (no_more_lines () && suppress_matched)
744 handle_line_error (p, repetition);
746 linenum = get_first_line_in_buffer ();
747 while (linenum++ < last_line_to_save)
749 struct cstring *line = remove_line ();
750 if (line == NULL)
751 handle_line_error (p, repetition);
752 save_line_to_file (line);
755 close_output_file ();
757 if (suppress_matched)
758 remove_line ();
760 /* Ensure that the line number specified is not 1 greater than
761 the number of lines in the file. */
762 if (no_more_lines () && !suppress_matched)
763 handle_line_error (p, repetition);
766 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
767 static void
768 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
770 fprintf (stderr, _("%s: %s: match not found"),
771 program_name, quote (global_argv[p->argnum]));
773 if (repetition)
775 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
776 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
778 else
779 fprintf (stderr, "\n");
781 if (!ignore)
783 dump_rest_of_file ();
784 close_output_file ();
786 cleanup_fatal ();
789 /* Read the input until a line matches the regexp in P, outputting
790 it unless P->IGNORE is true.
791 REPETITION is this repeat-count; 0 means the first time. */
793 static void
794 process_regexp (struct control *p, uintmax_t repetition)
796 struct cstring *line; /* From input file. */
797 size_t line_len; /* To make "$" in regexps work. */
798 uintmax_t break_line; /* First line number of next file. */
799 bool ignore = p->ignore; /* If true, skip this section. */
800 regoff_t ret;
802 if (!ignore)
803 create_output_file ();
805 if (suppress_matched && current_line > 0)
806 remove_line ();
808 /* If there is no offset for the regular expression, or
809 it is positive, then it is not necessary to buffer the lines. */
811 if (p->offset >= 0)
813 while (true)
815 line = find_line (++current_line);
816 if (line == NULL)
818 if (p->repeat_forever)
820 if (!ignore)
822 dump_rest_of_file ();
823 close_output_file ();
825 exit (EXIT_SUCCESS);
827 else
828 regexp_error (p, repetition, ignore);
830 line_len = line->len;
831 if (line->str[line_len - 1] == '\n')
832 line_len--;
833 ret = re_search (&p->re_compiled, line->str, line_len,
834 0, line_len, NULL);
835 if (ret == -2)
837 error (0, 0, _("error in regular expression search"));
838 cleanup_fatal ();
840 if (ret == -1)
842 line = remove_line ();
843 if (!ignore)
844 save_line_to_file (line);
846 else
847 break;
850 else
852 /* Buffer the lines. */
853 while (true)
855 line = find_line (++current_line);
856 if (line == NULL)
858 if (p->repeat_forever)
860 if (!ignore)
862 dump_rest_of_file ();
863 close_output_file ();
865 exit (EXIT_SUCCESS);
867 else
868 regexp_error (p, repetition, ignore);
870 line_len = line->len;
871 if (line->str[line_len - 1] == '\n')
872 line_len--;
873 ret = re_search (&p->re_compiled, line->str, line_len,
874 0, line_len, NULL);
875 if (ret == -2)
877 error (0, 0, _("error in regular expression search"));
878 cleanup_fatal ();
880 if (ret != -1)
881 break;
885 /* Account for any offset from this regexp. */
886 break_line = current_line + p->offset;
888 write_to_file (break_line, ignore, p->argnum);
890 if (!ignore)
891 close_output_file ();
893 if (p->offset > 0)
894 current_line = break_line;
897 /* Split the input file according to the control records we have built. */
899 static void
900 split_file (void)
902 size_t i;
904 for (i = 0; i < control_used; i++)
906 uintmax_t j;
907 if (controls[i].regexpr)
909 for (j = 0; (controls[i].repeat_forever
910 || j <= controls[i].repeat); j++)
911 process_regexp (&controls[i], j);
913 else
915 for (j = 0; (controls[i].repeat_forever
916 || j <= controls[i].repeat); j++)
917 process_line_count (&controls[i], j);
921 create_output_file ();
922 dump_rest_of_file ();
923 close_output_file ();
926 /* Return the name of output file number NUM.
928 This function is called from a signal handler, so it should invoke
929 only reentrant functions that are async-signal-safe. POSIX does
930 not guarantee this for the functions called below, but we don't
931 know of any hosts where this implementation isn't safe. */
933 static char *
934 make_filename (unsigned int num)
936 strcpy (filename_space, prefix);
937 if (suffix)
938 sprintf (filename_space + strlen (prefix), suffix, num);
939 else
940 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
941 return filename_space;
944 /* Create the next output file. */
946 static void
947 create_output_file (void)
949 bool fopen_ok;
950 int fopen_errno;
952 output_filename = make_filename (files_created);
954 if (files_created == UINT_MAX)
956 fopen_ok = false;
957 fopen_errno = EOVERFLOW;
959 else
961 /* Create the output file in a critical section, to avoid races. */
962 sigset_t oldset;
963 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
964 output_stream = fopen (output_filename, "w");
965 fopen_ok = (output_stream != NULL);
966 fopen_errno = errno;
967 files_created += fopen_ok;
968 sigprocmask (SIG_SETMASK, &oldset, NULL);
971 if (! fopen_ok)
973 error (0, fopen_errno, "%s", quotef (output_filename));
974 cleanup_fatal ();
976 bytes_written = 0;
979 /* If requested, delete all the files we have created. This function
980 must be called only from critical sections. */
982 static void
983 delete_all_files (bool in_signal_handler)
985 unsigned int i;
987 if (! remove_files)
988 return;
990 for (i = 0; i < files_created; i++)
992 const char *name = make_filename (i);
993 if (unlink (name) != 0 && !in_signal_handler)
994 error (0, errno, "%s", quotef (name));
997 files_created = 0;
1000 /* Close the current output file and print the count
1001 of characters in this file. */
1003 static void
1004 close_output_file (void)
1006 if (output_stream)
1008 if (ferror (output_stream))
1010 error (0, 0, _("write error for %s"), quoteaf (output_filename));
1011 output_stream = NULL;
1012 cleanup_fatal ();
1014 if (fclose (output_stream) != 0)
1016 error (0, errno, "%s", quotef (output_filename));
1017 output_stream = NULL;
1018 cleanup_fatal ();
1020 if (bytes_written == 0 && elide_empty_files)
1022 sigset_t oldset;
1023 bool unlink_ok;
1024 int unlink_errno;
1026 /* Remove the output file in a critical section, to avoid races. */
1027 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
1028 unlink_ok = (unlink (output_filename) == 0);
1029 unlink_errno = errno;
1030 files_created -= unlink_ok;
1031 sigprocmask (SIG_SETMASK, &oldset, NULL);
1033 if (! unlink_ok)
1034 error (0, unlink_errno, "%s", quotef (output_filename));
1036 else
1038 if (!suppress_count)
1040 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1041 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1044 output_stream = NULL;
1048 /* Save line LINE to the output file and
1049 increment the character count for the current file. */
1051 static void
1052 save_line_to_file (const struct cstring *line)
1054 size_t l = fwrite (line->str, sizeof (char), line->len, output_stream);
1055 if (l != line->len)
1057 error (0, errno, _("write error for %s"), quoteaf (output_filename));
1058 output_stream = NULL;
1059 cleanup_fatal ();
1061 bytes_written += line->len;
1064 /* Return a new, initialized control record. */
1066 static struct control *
1067 new_control_record (void)
1069 static size_t control_allocated = 0; /* Total space allocated. */
1070 struct control *p;
1072 if (control_used == control_allocated)
1073 controls = X2NREALLOC (controls, &control_allocated);
1074 p = &controls[control_used++];
1075 p->regexpr = false;
1076 p->repeat = 0;
1077 p->repeat_forever = false;
1078 p->lines_required = 0;
1079 p->offset = 0;
1080 return p;
1083 /* Check if there is a numeric offset after a regular expression.
1084 STR is the entire command line argument.
1085 P is the control record for this regular expression.
1086 NUM is the numeric part of STR. */
1088 static void
1089 check_for_offset (struct control *p, const char *str, const char *num)
1091 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1092 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"),
1093 quote (str));
1096 /* Given that the first character of command line arg STR is '{',
1097 make sure that the rest of the string is a valid repeat count
1098 and store its value in P.
1099 ARGNUM is the ARGV index of STR. */
1101 static void
1102 parse_repeat_count (int argnum, struct control *p, char *str)
1104 uintmax_t val;
1105 char *end;
1107 end = str + strlen (str) - 1;
1108 if (*end != '}')
1109 error (EXIT_FAILURE, 0, _("%s: '}' is required in repeat count"),
1110 quote (str));
1111 *end = '\0';
1113 if (str+1 == end-1 && *(str+1) == '*')
1114 p->repeat_forever = true;
1115 else
1117 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1119 error (EXIT_FAILURE, 0,
1120 _("%s}: integer required between '{' and '}'"),
1121 quote (global_argv[argnum]));
1123 p->repeat = val;
1126 *end = '}';
1129 /* Extract the regular expression from STR and check for a numeric offset.
1130 STR should start with the regexp delimiter character.
1131 Return a new control record for the regular expression.
1132 ARGNUM is the ARGV index of STR.
1133 Unless IGNORE is true, mark these lines for output. */
1135 static struct control *
1136 extract_regexp (int argnum, bool ignore, char const *str)
1138 size_t len; /* Number of bytes in this regexp. */
1139 char delim = *str;
1140 char const *closing_delim;
1141 struct control *p;
1142 const char *err;
1144 closing_delim = strrchr (str + 1, delim);
1145 if (closing_delim == NULL)
1146 error (EXIT_FAILURE, 0,
1147 _("%s: closing delimiter '%c' missing"), str, delim);
1149 len = closing_delim - str - 1;
1150 p = new_control_record ();
1151 p->argnum = argnum;
1152 p->ignore = ignore;
1154 p->regexpr = true;
1155 p->re_compiled.buffer = NULL;
1156 p->re_compiled.allocated = 0;
1157 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1158 p->re_compiled.translate = NULL;
1159 re_syntax_options =
1160 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1161 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1162 if (err)
1164 error (0, 0, _("%s: invalid regular expression: %s"), quote (str), err);
1165 cleanup_fatal ();
1168 if (closing_delim[1])
1169 check_for_offset (p, str, closing_delim + 1);
1171 return p;
1174 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1175 After each pattern, check if the next argument is a repeat count. */
1177 static void
1178 parse_patterns (int argc, int start, char **argv)
1180 int i; /* Index into ARGV. */
1181 struct control *p; /* New control record created. */
1182 uintmax_t val;
1183 static uintmax_t last_val = 0;
1185 for (i = start; i < argc; i++)
1187 if (*argv[i] == '/' || *argv[i] == '%')
1189 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1191 else
1193 p = new_control_record ();
1194 p->argnum = i;
1196 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1197 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), quote (argv[i]));
1198 if (val == 0)
1199 error (EXIT_FAILURE, 0,
1200 _("%s: line number must be greater than zero"),
1201 argv[i]);
1202 if (val < last_val)
1204 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1205 error (EXIT_FAILURE, 0,
1206 _("line number %s is smaller than preceding line number, %s"),
1207 quote (argv[i]), umaxtostr (last_val, buf));
1210 if (val == last_val)
1211 error (0, 0,
1212 _("warning: line number %s is the same as preceding line number"),
1213 quote (argv[i]));
1215 last_val = val;
1217 p->lines_required = val;
1220 if (i + 1 < argc && *argv[i + 1] == '{')
1222 /* We have a repeat count. */
1223 i++;
1224 parse_repeat_count (i, p, argv[i]);
1231 /* Names for the printf format flags ' and #. These can be ORed together. */
1232 enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
1234 /* Scan the printf format flags in FORMAT, storing info about the
1235 flags into *FLAGS_PTR. Return the number of flags found. */
1236 static size_t
1237 get_format_flags (char const *format, int *flags_ptr)
1239 int flags = 0;
1241 for (size_t count = 0; ; count++)
1243 switch (format[count])
1245 case '-':
1246 case '0':
1247 break;
1249 case '\'':
1250 flags |= FLAG_THOUSANDS;
1251 break;
1253 case '#':
1254 flags |= FLAG_ALTERNATIVE;
1255 break;
1257 default:
1258 *flags_ptr = flags;
1259 return count;
1264 /* Check that the printf format conversion specifier *FORMAT is valid
1265 and compatible with FLAGS. Change it to 'u' if it is 'd' or 'i',
1266 since the format will be used with an unsigned value. */
1267 static void
1268 check_format_conv_type (char *format, int flags)
1270 unsigned char ch = *format;
1271 int compatible_flags = FLAG_THOUSANDS;
1273 switch (ch)
1275 case 'd':
1276 case 'i':
1277 *format = 'u';
1278 break;
1280 case 'u':
1281 break;
1283 case 'o':
1284 case 'x':
1285 case 'X':
1286 compatible_flags = FLAG_ALTERNATIVE;
1287 break;
1289 case 0:
1290 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1291 break;
1293 default:
1294 if (isprint (ch))
1295 error (EXIT_FAILURE, 0,
1296 _("invalid conversion specifier in suffix: %c"), ch);
1297 else
1298 error (EXIT_FAILURE, 0,
1299 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1302 if (flags & ~ compatible_flags)
1303 error (EXIT_FAILURE, 0,
1304 _("invalid flags in conversion specification: %%%c%c"),
1305 (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
1308 /* Return the maximum number of bytes that can be generated by
1309 applying FORMAT to an unsigned int value. If the format is
1310 invalid, diagnose the problem and exit. */
1311 static size_t
1312 max_out (char *format)
1314 bool percent = false;
1316 for (char *f = format; *f; f++)
1317 if (*f == '%' && *++f != '%')
1319 if (percent)
1320 error (EXIT_FAILURE, 0,
1321 _("too many %% conversion specifications in suffix"));
1322 percent = true;
1323 int flags;
1324 f += get_format_flags (f, &flags);
1325 while (ISDIGIT (*f))
1326 f++;
1327 if (*f == '.')
1328 while (ISDIGIT (*++f))
1329 continue;
1330 check_format_conv_type (f, flags);
1333 if (! percent)
1334 error (EXIT_FAILURE, 0,
1335 _("missing %% conversion specification in suffix"));
1337 int maxlen = snprintf (NULL, 0, format, UINT_MAX);
1338 if (! (0 <= maxlen && maxlen <= SIZE_MAX))
1339 xalloc_die ();
1340 return maxlen;
1344 main (int argc, char **argv)
1346 int optc;
1348 initialize_main (&argc, &argv);
1349 set_program_name (argv[0]);
1350 setlocale (LC_ALL, "");
1351 bindtextdomain (PACKAGE, LOCALEDIR);
1352 textdomain (PACKAGE);
1354 atexit (close_stdout);
1356 global_argv = argv;
1357 controls = NULL;
1358 control_used = 0;
1359 suppress_count = false;
1360 remove_files = true;
1361 suppress_matched = false;
1362 prefix = DEFAULT_PREFIX;
1364 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1365 switch (optc)
1367 case 'f':
1368 prefix = optarg;
1369 break;
1371 case 'b':
1372 suffix = optarg;
1373 break;
1375 case 'k':
1376 remove_files = false;
1377 break;
1379 case 'n':
1380 digits = xdectoimax (optarg, 0, MIN (INT_MAX, SIZE_MAX), "",
1381 _("invalid number"), 0);
1382 break;
1384 case 's':
1385 case 'q':
1386 suppress_count = true;
1387 break;
1389 case 'z':
1390 elide_empty_files = true;
1391 break;
1393 case SUPPRESS_MATCHED_OPTION:
1394 suppress_matched = true;
1395 break;
1397 case_GETOPT_HELP_CHAR;
1399 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1401 default:
1402 usage (EXIT_FAILURE);
1405 if (argc - optind < 2)
1407 if (argc <= optind)
1408 error (0, 0, _("missing operand"));
1409 else
1410 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1411 usage (EXIT_FAILURE);
1414 size_t prefix_len = strlen (prefix);
1415 size_t max_digit_string_len
1416 = (suffix
1417 ? max_out (suffix)
1418 : MAX (INT_STRLEN_BOUND (unsigned int), digits));
1419 if (SIZE_MAX - 1 - prefix_len < max_digit_string_len)
1420 xalloc_die ();
1421 filename_space = xmalloc (prefix_len + max_digit_string_len + 1);
1423 set_input_file (argv[optind++]);
1425 parse_patterns (argc, optind, argv);
1428 int i;
1429 static int const sig[] =
1431 /* The usual suspects. */
1432 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1433 #ifdef SIGPOLL
1434 SIGPOLL,
1435 #endif
1436 #ifdef SIGPROF
1437 SIGPROF,
1438 #endif
1439 #ifdef SIGVTALRM
1440 SIGVTALRM,
1441 #endif
1442 #ifdef SIGXCPU
1443 SIGXCPU,
1444 #endif
1445 #ifdef SIGXFSZ
1446 SIGXFSZ,
1447 #endif
1449 enum { nsigs = ARRAY_CARDINALITY (sig) };
1451 struct sigaction act;
1453 sigemptyset (&caught_signals);
1454 for (i = 0; i < nsigs; i++)
1456 sigaction (sig[i], NULL, &act);
1457 if (act.sa_handler != SIG_IGN)
1458 sigaddset (&caught_signals, sig[i]);
1461 act.sa_handler = interrupt_handler;
1462 act.sa_mask = caught_signals;
1463 act.sa_flags = 0;
1465 for (i = 0; i < nsigs; i++)
1466 if (sigismember (&caught_signals, sig[i]))
1467 sigaction (sig[i], &act, NULL);
1470 split_file ();
1472 if (close (STDIN_FILENO) != 0)
1474 error (0, errno, _("read error"));
1475 cleanup_fatal ();
1478 return EXIT_SUCCESS;
1481 void
1482 usage (int status)
1484 if (status != EXIT_SUCCESS)
1485 emit_try_help ();
1486 else
1488 printf (_("\
1489 Usage: %s [OPTION]... FILE PATTERN...\n\
1491 program_name);
1492 fputs (_("\
1493 Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ...,\n\
1494 and output byte counts of each piece to standard output.\n\
1495 "), stdout);
1496 fputs (_("\
1498 Read standard input if FILE is -\n\
1499 "), stdout);
1501 emit_mandatory_arg_note ();
1503 fputs (_("\
1504 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1505 -f, --prefix=PREFIX use PREFIX instead of 'xx'\n\
1506 -k, --keep-files do not remove output files on errors\n\
1507 "), stdout);
1508 fputs (_("\
1509 --suppress-matched suppress the lines matching PATTERN\n\
1510 "), stdout);
1511 fputs (_("\
1512 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1513 -s, --quiet, --silent do not print counts of output file sizes\n\
1514 -z, --elide-empty-files remove empty output files\n\
1515 "), stdout);
1516 fputs (HELP_OPTION_DESCRIPTION, stdout);
1517 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1518 fputs (_("\
1520 Each PATTERN may be:\n\
1521 INTEGER copy up to but not including specified line number\n\
1522 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1523 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1524 {INTEGER} repeat the previous pattern specified number of times\n\
1525 {*} repeat the previous pattern as many times as possible\n\
1527 A line OFFSET is a required '+' or '-' followed by a positive integer.\n\
1528 "), stdout);
1529 emit_ancillary_info (PROGRAM_NAME);
1531 exit (status);