doc: sort: be more descriptive than 'manual'
[coreutils.git] / src / csplit.c
blob51bb38549ade0f2ddf55c1236686807a18e0b722
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 1991-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <ctype.h>
23 #include <getopt.h>
24 #include <sys/types.h>
25 #include <signal.h>
27 #include "system.h"
29 #include <regex.h>
31 #include "fd-reopen.h"
32 #include "quote.h"
33 #include "safe-read.h"
34 #include "stdio--.h"
35 #include "xdectoint.h"
36 #include "xstrtol.h"
38 /* The official name of this program (e.g., no 'g' prefix). */
39 #define PROGRAM_NAME "csplit"
41 #define AUTHORS \
42 proper_name ("Stuart Kemp"), \
43 proper_name ("David MacKenzie")
45 /* The default prefix for output file names. */
46 #define DEFAULT_PREFIX "xx"
48 /* A compiled pattern arg. */
49 struct control
51 intmax_t offset; /* Offset from regexp to split at. */
52 intmax_t lines_required; /* Number of lines required. */
53 intmax_t repeat; /* Repeat count. */
54 int argnum; /* ARGV index. */
55 bool repeat_forever; /* True if '*' used as a repeat count. */
56 bool ignore; /* If true, produce no output (for regexp). */
57 bool regexpr; /* True if regular expression was used. */
58 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
61 /* Initial size of data area in buffers. */
62 #define START_SIZE 8191
64 /* Number of lines kept in each node in line list. */
65 #define CTRL_SIZE 80
67 #ifdef DEBUG
68 /* Some small values to test the algorithms. */
69 # define START_SIZE 200
70 # define CTRL_SIZE 1
71 #endif
73 /* A string with a length count. */
74 struct cstring
76 idx_t len;
77 char *str;
80 /* Pointers to the beginnings of lines in the buffer area.
81 These structures are linked together if needed. */
82 struct line
84 idx_t used; /* Number of offsets used in this struct. */
85 idx_t insert_index; /* Next offset to use when inserting line. */
86 idx_t retrieve_index; /* Next index to use when retrieving line. */
87 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
88 struct line *next; /* Next in linked list. */
91 /* The structure to hold the input lines.
92 Contains a pointer to the data area and a list containing
93 pointers to the individual lines. */
94 struct buffer_record
96 idx_t bytes_alloc; /* Size of the buffer area. */
97 idx_t bytes_used; /* Bytes used in the buffer area. */
98 intmax_t start_line; /* First line number in this buffer. */
99 intmax_t first_available; /* First line that can be retrieved. */
100 idx_t num_lines; /* Number of complete lines in this buffer. */
101 char *buffer; /* Data area. */
102 struct line *line_start; /* Head of list of pointers to lines. */
103 struct line *curr_line; /* The line start record currently in use. */
104 struct buffer_record *next;
107 static void close_output_file (void);
108 static void create_output_file (void);
109 static void delete_all_files (bool);
110 static void save_line_to_file (const struct cstring *line);
112 /* Start of buffer list. */
113 static struct buffer_record *head = nullptr;
115 /* Partially read line. */
116 static char *hold_area = nullptr;
118 /* Number of bytes in 'hold_area'. */
119 static idx_t hold_count = 0;
121 /* Number of the last line in the buffers. */
122 static intmax_t last_line_number = 0;
124 /* Number of the line currently being examined. */
125 static intmax_t current_line = 0;
127 /* If true, we have read EOF. */
128 static bool have_read_eof = false;
130 /* Name of output files. */
131 static char *volatile filename_space = nullptr;
133 /* Prefix part of output file names. */
134 static char const *volatile prefix = nullptr;
136 /* Suffix part of output file names. */
137 static char *volatile suffix = nullptr;
139 /* Number of digits to use in output file names. */
140 static int volatile digits = 2;
142 /* Number of files created so far. */
143 static int volatile files_created = 0;
145 /* Number of bytes written to current file. */
146 static intmax_t bytes_written;
148 /* Output file pointer. */
149 static FILE *output_stream = nullptr;
151 /* Output file name. */
152 static char *output_filename = nullptr;
154 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
155 static char **global_argv;
157 /* If true, do not print the count of bytes in each output file. */
158 static bool suppress_count;
160 /* If true, remove output files on error. */
161 static bool volatile remove_files;
163 /* If true, remove all output files which have a zero length. */
164 static bool elide_empty_files;
166 /* If true, suppress the lines that match the PATTERN */
167 static bool suppress_matched;
169 /* The compiled pattern arguments, which determine how to split
170 the input file. */
171 static struct control *controls;
173 /* Number of elements in 'controls'. */
174 static idx_t control_used;
176 /* The set of signals that are caught. */
177 static sigset_t caught_signals;
179 /* For long options that have no equivalent short option, use a
180 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
181 enum
183 SUPPRESS_MATCHED_OPTION = CHAR_MAX + 1
186 static struct option const longopts[] =
188 {"digits", required_argument, nullptr, 'n'},
189 {"quiet", no_argument, nullptr, 'q'},
190 {"silent", no_argument, nullptr, 's'},
191 {"keep-files", no_argument, nullptr, 'k'},
192 {"elide-empty-files", no_argument, nullptr, 'z'},
193 {"prefix", required_argument, nullptr, 'f'},
194 {"suffix-format", required_argument, nullptr, 'b'},
195 {"suppress-matched", no_argument, nullptr, SUPPRESS_MATCHED_OPTION},
196 {GETOPT_HELP_OPTION_DECL},
197 {GETOPT_VERSION_OPTION_DECL},
198 {nullptr, 0, nullptr, 0}
201 /* Optionally remove files created so far; then exit.
202 Called when an error detected. */
204 static void
205 cleanup (void)
207 sigset_t oldset;
209 close_output_file ();
211 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
212 delete_all_files (false);
213 sigprocmask (SIG_SETMASK, &oldset, nullptr);
216 static _Noreturn void
217 cleanup_fatal (void)
219 cleanup ();
220 exit (EXIT_FAILURE);
223 extern void
224 xalloc_die (void)
226 error (0, 0, "%s", _("memory exhausted"));
227 cleanup_fatal ();
230 static void
231 interrupt_handler (int sig)
233 delete_all_files (true);
234 signal (sig, SIG_DFL);
235 /* The signal has been reset to SIG_DFL, but blocked during this
236 handler. Force the default action of this signal once the
237 handler returns and the block is removed. */
238 raise (sig);
241 /* Keep track of NUM bytes of a partial line in buffer START.
242 These bytes will be retrieved later when another large buffer is read. */
244 static void
245 save_to_hold_area (char *start, idx_t num)
247 free (hold_area);
248 hold_area = start;
249 hold_count = num;
252 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
253 Return the number of bytes read. */
255 static idx_t
256 read_input (char *dest, idx_t max_n_bytes)
258 if (max_n_bytes == 0)
259 return 0;
261 ptrdiff_t bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
263 if (bytes_read == 0)
264 have_read_eof = true;
266 if (bytes_read < 0)
268 error (0, errno, _("read error"));
269 cleanup_fatal ();
272 return bytes_read;
275 /* Initialize existing line record P. */
277 static void
278 clear_line_control (struct line *p)
280 p->used = 0;
281 p->insert_index = 0;
282 p->retrieve_index = 0;
285 /* Return a new, initialized line record. */
287 static struct line *
288 new_line_control (void)
290 struct line *p = xmalloc (sizeof *p);
292 p->next = nullptr;
293 clear_line_control (p);
295 return p;
298 /* Record LINE_START, which is the address of the start of a line
299 of length LINE_LEN in the large buffer, in the lines buffer of B. */
301 static void
302 keep_new_line (struct buffer_record *b, char *line_start, idx_t line_len)
304 struct line *l;
306 /* If there is no existing area to keep line info, get some. */
307 if (b->line_start == nullptr)
308 b->line_start = b->curr_line = new_line_control ();
310 /* If existing area for lines is full, get more. */
311 if (b->curr_line->used == CTRL_SIZE)
313 b->curr_line->next = new_line_control ();
314 b->curr_line = b->curr_line->next;
317 l = b->curr_line;
319 /* Record the start of the line, and update counters. */
320 l->starts[l->insert_index].str = line_start;
321 l->starts[l->insert_index].len = line_len;
322 l->used++;
323 l->insert_index++;
326 /* Scan the buffer in B for newline characters
327 and record the line start locations and lengths in B.
328 Return the number of lines found in this buffer.
330 There may be an incomplete line at the end of the buffer;
331 a pointer is kept to this area, which will be used when
332 the next buffer is filled. */
334 static idx_t
335 record_line_starts (struct buffer_record *b)
337 char *line_start; /* Start of current line. */
338 idx_t lines; /* Number of lines found. */
339 idx_t line_length; /* Length of each line found. */
341 if (b->bytes_used == 0)
342 return 0;
344 lines = 0;
345 line_start = b->buffer;
346 char *buffer_end = line_start + b->bytes_used;
347 *buffer_end = '\n';
349 while (true)
351 char *line_end = rawmemchr (line_start, '\n');
352 if (line_end == buffer_end)
353 break;
354 line_length = line_end - line_start + 1;
355 keep_new_line (b, line_start, line_length);
356 line_start = line_end + 1;
357 lines++;
360 /* Check for an incomplete last line. */
361 idx_t bytes_left = buffer_end - line_start;
362 if (bytes_left)
364 if (have_read_eof)
366 keep_new_line (b, line_start, bytes_left);
367 lines++;
369 else
370 save_to_hold_area (ximemdup (line_start, bytes_left), bytes_left);
373 b->num_lines = lines;
374 b->first_available = b->start_line = last_line_number + 1;
375 last_line_number += lines;
377 return lines;
380 /* Work around <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109614>. */
381 #if 13 <= __GNUC__
382 # pragma GCC diagnostic ignored "-Wanalyzer-mismatching-deallocation"
383 # pragma GCC diagnostic ignored "-Wanalyzer-use-after-free"
384 # pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value"
385 #endif
387 static void
388 free_buffer (struct buffer_record *buf)
390 for (struct line *l = buf->line_start; l;)
392 struct line *n = l->next;
393 free (l);
394 l = n;
396 free (buf->buffer);
397 free (buf);
400 /* Return a new buffer of at least MINSIZE bytes. */
402 static ATTRIBUTE_DEALLOC (free_buffer, 1)
403 struct buffer_record *
404 get_new_buffer (idx_t min_size)
406 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
407 new_buffer->bytes_alloc = 0;
408 new_buffer->buffer = xpalloc (nullptr, &new_buffer->bytes_alloc, min_size,
409 -1, 1);
410 new_buffer->bytes_used = 0;
411 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
412 new_buffer->num_lines = 0;
413 new_buffer->line_start = new_buffer->curr_line = nullptr;
414 new_buffer->next = nullptr;
416 return new_buffer;
419 /* Append buffer BUF to the linked list of buffers that contain
420 some data yet to be processed. */
422 static void
423 save_buffer (struct buffer_record *buf)
425 struct buffer_record *p;
427 buf->next = nullptr;
428 buf->curr_line = buf->line_start;
430 if (head == nullptr)
431 head = buf;
432 else
434 for (p = head; p->next; p = p->next)
435 /* Do nothing. */ ;
436 p->next = buf;
440 /* Fill a buffer of input.
442 Set the initial size of the buffer to a default.
443 Fill the buffer (from the hold area and input stream)
444 and find the individual lines.
445 If no lines are found (the buffer is too small to hold the next line),
446 release the current buffer (whose contents would have been put in the
447 hold area) and repeat the process with another large buffer until at least
448 one entire line has been read.
450 Return true if a new buffer was obtained, otherwise false
451 (in which case end-of-file must have been encountered). */
453 static bool
454 load_buffer (void)
456 if (have_read_eof)
457 return false;
459 /* We must make the buffer at least as large as the amount of data
460 in the partial line left over from the last call,
461 plus room for a sentinel '\n'. */
462 idx_t bytes_wanted = MAX (START_SIZE, hold_count + 1);
464 while (true)
466 struct buffer_record *b = get_new_buffer (bytes_wanted);
467 idx_t bytes_alloc = b->bytes_alloc;
468 idx_t bytes_avail = bytes_alloc;
469 char *p = b->buffer;
471 /* First check the 'holding' area for a partial line. */
472 if (hold_count)
474 p = mempcpy (p, hold_area, hold_count);
475 b->bytes_used += hold_count;
476 bytes_avail -= hold_count;
477 hold_count = 0;
480 b->bytes_used += read_input (p, bytes_avail - 1);
482 if (record_line_starts (b) != 0)
484 save_buffer (b);
485 return true;
488 free_buffer (b);
489 if (have_read_eof)
490 return false;
491 if (ckd_add (&bytes_wanted, bytes_alloc, bytes_alloc >> 1))
492 xalloc_die ();
496 /* Return the line number of the first line that has not yet been retrieved. */
498 static intmax_t
499 get_first_line_in_buffer (void)
501 if (head == nullptr && !load_buffer ())
502 error (EXIT_FAILURE, errno, _("input disappeared"));
504 return head->first_available;
507 /* Return a pointer to the logical first line in the buffer and make the
508 next line the logical first line.
509 Return nullptr if there is no more input. */
511 static struct cstring *
512 remove_line (void)
514 /* If non-null, this is the buffer for which the previous call
515 returned the final line. So now, presuming that line has been
516 processed, we can free the buffer and reset this pointer. */
517 static struct buffer_record *prev_buf = nullptr;
519 struct cstring *line; /* Return value. */
520 struct line *l; /* For convenience. */
522 if (prev_buf)
524 free_buffer (prev_buf);
525 prev_buf = nullptr;
528 if (head == nullptr && !load_buffer ())
529 return nullptr;
531 if (current_line < head->first_available)
532 current_line = head->first_available;
534 ++(head->first_available);
536 l = head->curr_line;
538 line = &l->starts[l->retrieve_index];
540 /* Advance index to next line. */
541 if (++l->retrieve_index == l->used)
543 /* Go on to the next line record. */
544 head->curr_line = l->next;
545 if (head->curr_line == nullptr || head->curr_line->used == 0)
547 /* Go on to the next data block.
548 but first record the current one so we can free it
549 once the line we're returning has been processed. */
550 prev_buf = head;
551 head = head->next;
555 return line;
558 /* Search the buffers for line LINENUM, reading more input if necessary.
559 Return a pointer to the line, or nullptr if it is not found in the file. */
561 static struct cstring *
562 find_line (intmax_t linenum)
564 struct buffer_record *b;
566 if (head == nullptr && !load_buffer ())
567 return nullptr;
569 if (linenum < head->start_line)
570 return nullptr;
572 for (b = head;;)
574 if (linenum < b->start_line + b->num_lines)
576 /* The line is in this buffer. */
577 struct line *l;
578 idx_t offset; /* How far into the buffer the line is. */
580 l = b->line_start;
581 offset = linenum - b->start_line;
582 /* Find the control record. */
583 while (offset >= CTRL_SIZE)
585 l = l->next;
586 offset -= CTRL_SIZE;
588 return &l->starts[offset];
590 if (b->next == nullptr && !load_buffer ())
591 return nullptr;
592 b = b->next; /* Try the next data block. */
596 /* Return true if at least one more line is available for input. */
598 static bool
599 no_more_lines (void)
601 return find_line (current_line + 1) == nullptr;
604 /* Open NAME as standard input. */
606 static void
607 set_input_file (char const *name)
609 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
610 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
611 quoteaf (name));
614 /* Write all lines from the beginning of the buffer up to, but
615 not including, line LAST_LINE, to the current output file.
616 If IGNORE is true, do not output lines selected here.
617 ARGNUM is the index in ARGV of the current pattern. */
619 static void
620 write_to_file (intmax_t last_line, bool ignore, int argnum)
622 struct cstring *line;
623 intmax_t first_line; /* First available input line. */
624 intmax_t lines; /* Number of lines to output. */
625 intmax_t i;
627 first_line = get_first_line_in_buffer ();
629 if (first_line > last_line)
631 error (0, 0, _("%s: line number out of range"),
632 quote (global_argv[argnum]));
633 cleanup_fatal ();
636 lines = last_line - first_line;
638 for (i = 0; i < lines; i++)
640 line = remove_line ();
641 if (line == nullptr)
643 error (0, 0, _("%s: line number out of range"),
644 quote (global_argv[argnum]));
645 cleanup_fatal ();
647 if (!ignore)
648 save_line_to_file (line);
652 /* Output any lines left after all regexps have been processed. */
654 static void
655 dump_rest_of_file (void)
657 struct cstring *line;
659 while ((line = remove_line ()) != nullptr)
660 save_line_to_file (line);
663 /* Handle an attempt to read beyond EOF under the control of record P,
664 on iteration REPETITION if nonzero. */
666 static void
667 handle_line_error (const struct control *p, intmax_t repetition)
669 char buf[INT_BUFSIZE_BOUND (intmax_t)];
671 fprintf (stderr, _("%s: %s: line number out of range"),
672 program_name, quote (imaxtostr (p->lines_required, buf)));
673 if (repetition)
674 fprintf (stderr, _(" on repetition %jd\n"), repetition);
675 else
676 fprintf (stderr, "\n");
678 cleanup_fatal ();
681 /* Determine the line number that marks the end of this file,
682 then get those lines and save them to the output file.
683 P is the control record.
684 REPETITION is the repetition number. */
686 static void
687 process_line_count (const struct control *p, intmax_t repetition)
689 intmax_t linenum;
690 intmax_t last_line_to_save = p->lines_required * (repetition + 1);
692 create_output_file ();
694 /* Ensure that the line number specified is not 1 greater than
695 the number of lines in the file.
696 When suppressing matched lines, check before the loop. */
697 if (no_more_lines () && suppress_matched)
698 handle_line_error (p, repetition);
700 linenum = get_first_line_in_buffer ();
701 while (linenum++ < last_line_to_save)
703 struct cstring *line = remove_line ();
704 if (line == nullptr)
705 handle_line_error (p, repetition);
706 save_line_to_file (line);
709 close_output_file ();
711 if (suppress_matched)
712 remove_line ();
714 /* Ensure that the line number specified is not 1 greater than
715 the number of lines in the file. */
716 if (no_more_lines () && !suppress_matched)
717 handle_line_error (p, repetition);
720 static void
721 regexp_error (struct control *p, intmax_t repetition, bool ignore)
723 fprintf (stderr, _("%s: %s: match not found"),
724 program_name, quote (global_argv[p->argnum]));
726 if (repetition)
727 fprintf (stderr, _(" on repetition %jd\n"), repetition);
728 else
729 fprintf (stderr, "\n");
731 if (!ignore)
733 dump_rest_of_file ();
734 close_output_file ();
736 cleanup_fatal ();
739 /* Read the input until a line matches the regexp in P, outputting
740 it unless P->IGNORE is true.
741 REPETITION is this repeat-count; 0 means the first time. */
743 static void
744 process_regexp (struct control *p, intmax_t repetition)
746 struct cstring *line; /* From input file. */
747 idx_t line_len; /* To make "$" in regexps work. */
748 intmax_t break_line; /* First line number of next file. */
749 bool ignore = p->ignore; /* If true, skip this section. */
750 regoff_t ret;
752 if (!ignore)
753 create_output_file ();
755 /* If there is no offset for the regular expression, or
756 it is positive, then it is not necessary to buffer the lines. */
758 if (p->offset >= 0)
760 while (true)
762 line = find_line (++current_line);
763 if (line == nullptr)
765 if (p->repeat_forever)
767 if (!ignore)
769 dump_rest_of_file ();
770 close_output_file ();
772 exit (EXIT_SUCCESS);
774 else
775 regexp_error (p, repetition, ignore);
777 line_len = line->len;
778 if (line->str[line_len - 1] == '\n')
779 line_len--;
780 ret = re_search (&p->re_compiled, line->str, line_len,
781 0, line_len, nullptr);
782 if (ret == -2)
784 error (0, 0, _("error in regular expression search"));
785 cleanup_fatal ();
787 if (ret == -1)
789 line = remove_line ();
790 if (!ignore)
791 save_line_to_file (line);
793 else
794 break;
797 else
799 /* Buffer the lines. */
800 while (true)
802 line = find_line (++current_line);
803 if (line == nullptr)
805 if (p->repeat_forever)
807 if (!ignore)
809 dump_rest_of_file ();
810 close_output_file ();
812 exit (EXIT_SUCCESS);
814 else
815 regexp_error (p, repetition, ignore);
817 line_len = line->len;
818 if (line->str[line_len - 1] == '\n')
819 line_len--;
820 ret = re_search (&p->re_compiled, line->str, line_len,
821 0, line_len, nullptr);
822 if (ret == -2)
824 error (0, 0, _("error in regular expression search"));
825 cleanup_fatal ();
827 if (ret != -1)
828 break;
832 /* Account for any offset from this regexp. */
833 break_line = current_line + p->offset;
835 write_to_file (break_line, ignore, p->argnum);
837 if (!ignore)
838 close_output_file ();
840 if (p->offset > 0)
841 current_line = break_line;
843 if (suppress_matched)
844 remove_line ();
847 /* Split the input file according to the control records we have built. */
849 static void
850 split_file (void)
852 for (idx_t i = 0; i < control_used; i++)
854 intmax_t j;
855 if (controls[i].regexpr)
857 for (j = 0; (controls[i].repeat_forever
858 || j <= controls[i].repeat); j++)
859 process_regexp (&controls[i], j);
861 else
863 for (j = 0; (controls[i].repeat_forever
864 || j <= controls[i].repeat); j++)
865 process_line_count (&controls[i], j);
869 create_output_file ();
870 dump_rest_of_file ();
871 close_output_file ();
874 /* Return the name of output file number NUM.
876 This function is called from a signal handler, so it should invoke
877 only reentrant functions that are async-signal-safe. POSIX does
878 not guarantee this for the functions called below, but we don't
879 know of any hosts where this implementation isn't safe. */
881 static char *
882 make_filename (int num)
884 strcpy (filename_space, prefix);
885 if (suffix)
886 sprintf (filename_space + strlen (prefix), suffix, num);
887 else
888 sprintf (filename_space + strlen (prefix), "%0*d", digits, num);
889 return filename_space;
892 /* Create the next output file. */
894 static void
895 create_output_file (void)
897 int nfiles = files_created;
898 bool fopen_ok;
899 int fopen_errno;
901 output_filename = make_filename (nfiles);
903 if (nfiles == INT_MAX)
905 fopen_ok = false;
906 fopen_errno = EOVERFLOW;
908 else
910 /* Create the output file in a critical section, to avoid races. */
911 sigset_t oldset;
912 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
913 output_stream = fopen (output_filename, "w");
914 fopen_ok = (output_stream != nullptr);
915 fopen_errno = errno;
916 files_created = nfiles + fopen_ok;
917 sigprocmask (SIG_SETMASK, &oldset, nullptr);
920 if (! fopen_ok)
922 error (0, fopen_errno, "%s", quotef (output_filename));
923 cleanup_fatal ();
925 bytes_written = 0;
928 /* If requested, delete all the files we have created. This function
929 must be called only from critical sections. */
931 static void
932 delete_all_files (bool in_signal_handler)
934 if (! remove_files)
935 return;
937 for (int i = files_created; 0 <= --i; )
939 char const *name = make_filename (i);
940 if (unlink (name) != 0 && errno != ENOENT && !in_signal_handler)
941 error (0, errno, "%s", quotef (name));
944 files_created = 0;
947 /* Close the current output file and print the count
948 of characters in this file. */
950 static void
951 close_output_file (void)
953 if (output_stream)
955 if (ferror (output_stream))
957 error (0, 0, _("write error for %s"), quoteaf (output_filename));
958 output_stream = nullptr;
959 cleanup_fatal ();
961 if (fclose (output_stream) != 0)
963 error (0, errno, "%s", quotef (output_filename));
964 output_stream = nullptr;
965 cleanup_fatal ();
967 if (bytes_written == 0 && elide_empty_files)
969 sigset_t oldset;
970 bool unlink_ok;
971 int unlink_errno;
973 /* Remove the output file in a critical section, to avoid races. */
974 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
975 unlink_ok = (unlink (output_filename) == 0);
976 unlink_errno = errno;
977 files_created--;
978 sigprocmask (SIG_SETMASK, &oldset, nullptr);
980 if (! unlink_ok && unlink_errno != ENOENT)
981 error (0, unlink_errno, "%s", quotef (output_filename));
983 else
985 if (!suppress_count)
986 fprintf (stdout, "%jd\n", bytes_written);
988 output_stream = nullptr;
992 /* Save line LINE to the output file and
993 increment the character count for the current file. */
995 static void
996 save_line_to_file (const struct cstring *line)
998 idx_t l = fwrite (line->str, sizeof (char), line->len, output_stream);
999 if (l != line->len)
1001 error (0, errno, _("write error for %s"), quoteaf (output_filename));
1002 output_stream = nullptr;
1003 cleanup_fatal ();
1005 bytes_written += line->len;
1008 /* Return a new, initialized control record. */
1010 static struct control *
1011 new_control_record (void)
1013 static idx_t control_allocated = 0; /* Total space allocated. */
1014 struct control *p;
1016 if (control_used == control_allocated)
1017 controls = xpalloc (controls, &control_allocated, 1, -1, sizeof *controls);
1018 p = &controls[control_used++];
1019 p->regexpr = false;
1020 p->repeat = 0;
1021 p->repeat_forever = false;
1022 p->lines_required = 0;
1023 p->offset = 0;
1024 return p;
1027 /* Check if there is a numeric offset after a regular expression.
1028 STR is the entire command line argument.
1029 P is the control record for this regular expression.
1030 NUM is the numeric part of STR. */
1032 static void
1033 check_for_offset (struct control *p, char const *str, char const *num)
1035 if (xstrtoimax (num, nullptr, 10, &p->offset, "") != LONGINT_OK)
1036 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"),
1037 quote (str));
1040 /* Given that the first character of command line arg STR is '{',
1041 make sure that the rest of the string is a valid repeat count
1042 and store its value in P.
1043 ARGNUM is the ARGV index of STR. */
1045 static void
1046 parse_repeat_count (int argnum, struct control *p, char *str)
1048 char *end;
1050 end = str + strlen (str) - 1;
1051 if (*end != '}')
1052 error (EXIT_FAILURE, 0, _("%s: '}' is required in repeat count"),
1053 quote (str));
1054 *end = '\0';
1056 if (str + 1 == end - 1 && *(str + 1) == '*')
1057 p->repeat_forever = true;
1058 else
1060 uintmax_t val;
1061 if (xstrtoumax (str + 1, nullptr, 10, &val, "") != LONGINT_OK
1062 || INTMAX_MAX < val)
1064 error (EXIT_FAILURE, 0,
1065 _("%s}: integer required between '{' and '}'"),
1066 quote (global_argv[argnum]));
1068 p->repeat = val;
1071 *end = '}';
1074 /* Extract the regular expression from STR and check for a numeric offset.
1075 STR should start with the regexp delimiter character.
1076 Return a new control record for the regular expression.
1077 ARGNUM is the ARGV index of STR.
1078 Unless IGNORE is true, mark these lines for output. */
1080 static struct control *
1081 extract_regexp (int argnum, bool ignore, char const *str)
1083 idx_t len; /* Number of bytes in this regexp. */
1084 char delim = *str;
1085 char const *closing_delim;
1086 struct control *p;
1087 char const *err;
1089 closing_delim = strrchr (str + 1, delim);
1090 if (closing_delim == nullptr)
1091 error (EXIT_FAILURE, 0,
1092 _("%s: closing delimiter '%c' missing"), str, delim);
1094 len = closing_delim - str - 1;
1095 p = new_control_record ();
1096 p->argnum = argnum;
1097 p->ignore = ignore;
1099 p->regexpr = true;
1100 p->re_compiled.buffer = nullptr;
1101 p->re_compiled.allocated = 0;
1102 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1103 p->re_compiled.translate = nullptr;
1104 re_syntax_options =
1105 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1106 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1107 if (err)
1109 error (0, 0, _("%s: invalid regular expression: %s"), quote (str), err);
1110 cleanup_fatal ();
1113 if (closing_delim[1])
1114 check_for_offset (p, str, closing_delim + 1);
1116 return p;
1119 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1120 After each pattern, check if the next argument is a repeat count. */
1122 static void
1123 parse_patterns (int argc, int start, char **argv)
1125 struct control *p; /* New control record created. */
1126 static intmax_t last_val = 0;
1128 for (int i = start; i < argc; i++)
1130 if (*argv[i] == '/' || *argv[i] == '%')
1132 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1134 else
1136 p = new_control_record ();
1137 p->argnum = i;
1139 uintmax_t val;
1140 if (xstrtoumax (argv[i], nullptr, 10, &val, "") != LONGINT_OK
1141 || INTMAX_MAX < val)
1142 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), quote (argv[i]));
1143 if (val == 0)
1144 error (EXIT_FAILURE, 0,
1145 _("%s: line number must be greater than zero"), argv[i]);
1146 if (val < last_val)
1147 error (EXIT_FAILURE, 0,
1148 _("line number %s is smaller than preceding line number,"
1149 " %jd"), quote (argv[i]), last_val);
1151 if (val == last_val)
1152 error (0, 0,
1153 _("warning: line number %s is the same as preceding line number"),
1154 quote (argv[i]));
1156 last_val = val;
1158 p->lines_required = val;
1161 if (i + 1 < argc && *argv[i + 1] == '{')
1163 /* We have a repeat count. */
1164 i++;
1165 parse_repeat_count (i, p, argv[i]);
1172 /* Names for the printf format flags ' and #. These can be ORed together. */
1173 enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
1175 /* Scan the printf format flags in FORMAT, storing info about the
1176 flags into *FLAGS_PTR. Return the number of flags found. */
1177 static idx_t
1178 get_format_flags (char const *format, int *flags_ptr)
1180 int flags = 0;
1182 for (idx_t count = 0; ; count++)
1184 switch (format[count])
1186 case '-':
1187 case '0':
1188 break;
1190 case '\'':
1191 flags |= FLAG_THOUSANDS;
1192 break;
1194 case '#':
1195 flags |= FLAG_ALTERNATIVE;
1196 break;
1198 default:
1199 *flags_ptr = flags;
1200 return count;
1205 /* Check that the printf format conversion specifier *FORMAT is valid
1206 and compatible with FLAGS. Change it to 'd' if it is 'u',
1207 since the format will be used with a signed value. */
1208 static void
1209 check_format_conv_type (char *format, int flags)
1211 unsigned char ch = *format;
1212 int compatible_flags = FLAG_THOUSANDS;
1214 switch (ch)
1216 case 'd':
1217 case 'i':
1218 break;
1220 case 'u':
1221 *format = 'd';
1222 break;
1224 case 'o':
1225 case 'x':
1226 case 'X':
1227 compatible_flags = FLAG_ALTERNATIVE;
1228 break;
1230 case 0:
1231 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1233 default:
1234 if (isprint (ch))
1235 error (EXIT_FAILURE, 0,
1236 _("invalid conversion specifier in suffix: %c"), ch);
1237 else
1238 error (EXIT_FAILURE, 0,
1239 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1242 if (flags & ~ compatible_flags)
1243 error (EXIT_FAILURE, 0,
1244 _("invalid flags in conversion specification: %%%c%c"),
1245 (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
1248 /* Return the maximum number of bytes that can be generated by
1249 applying FORMAT to an int value. If the format is
1250 invalid, diagnose the problem and exit. */
1251 static idx_t
1252 max_out (char *format)
1254 bool percent = false;
1256 for (char *f = format; *f; f++)
1257 if (*f == '%' && *++f != '%')
1259 if (percent)
1260 error (EXIT_FAILURE, 0,
1261 _("too many %% conversion specifications in suffix"));
1262 percent = true;
1263 int flags;
1264 f += get_format_flags (f, &flags);
1265 while (ISDIGIT (*f))
1266 f++;
1267 if (*f == '.')
1268 while (ISDIGIT (*++f))
1269 continue;
1270 check_format_conv_type (f, flags);
1273 if (! percent)
1274 error (EXIT_FAILURE, 0,
1275 _("missing %% conversion specification in suffix"));
1277 int maxlen = snprintf (nullptr, 0, format, INT_MAX);
1278 if (maxlen < 0)
1279 xalloc_die ();
1280 return maxlen;
1284 main (int argc, char **argv)
1286 int optc;
1288 initialize_main (&argc, &argv);
1289 set_program_name (argv[0]);
1290 setlocale (LC_ALL, "");
1291 bindtextdomain (PACKAGE, LOCALEDIR);
1292 textdomain (PACKAGE);
1294 atexit (close_stdout);
1296 global_argv = argv;
1297 controls = nullptr;
1298 control_used = 0;
1299 suppress_count = false;
1300 remove_files = true;
1301 suppress_matched = false;
1302 prefix = DEFAULT_PREFIX;
1304 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, nullptr))
1305 != -1)
1306 switch (optc)
1308 case 'f':
1309 prefix = optarg;
1310 break;
1312 case 'b':
1313 suffix = optarg;
1314 break;
1316 case 'k':
1317 remove_files = false;
1318 break;
1320 case 'n':
1321 digits = xdectoimax (optarg, 0, MIN (INT_MAX, IDX_MAX), "",
1322 _("invalid number"), 0);
1323 break;
1325 case 's':
1326 case 'q':
1327 suppress_count = true;
1328 break;
1330 case 'z':
1331 elide_empty_files = true;
1332 break;
1334 case SUPPRESS_MATCHED_OPTION:
1335 suppress_matched = true;
1336 break;
1338 case_GETOPT_HELP_CHAR;
1340 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1342 default:
1343 usage (EXIT_FAILURE);
1346 if (argc - optind < 2)
1348 if (argc <= optind)
1349 error (0, 0, _("missing operand"));
1350 else
1351 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1352 usage (EXIT_FAILURE);
1355 idx_t prefix_len = strlen (prefix);
1356 idx_t max_digit_string_len
1357 = (suffix
1358 ? max_out (suffix)
1359 : MAX (INT_STRLEN_BOUND (int), digits));
1360 idx_t filename_size;
1361 if (ckd_add (&filename_size, prefix_len, max_digit_string_len + 1))
1362 xalloc_die ();
1363 filename_space = ximalloc (filename_size);
1365 set_input_file (argv[optind++]);
1367 parse_patterns (argc, optind, argv);
1370 int i;
1371 static int const sig[] =
1373 /* The usual suspects. */
1374 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1375 #ifdef SIGPOLL
1376 SIGPOLL,
1377 #endif
1378 #ifdef SIGPROF
1379 SIGPROF,
1380 #endif
1381 #ifdef SIGVTALRM
1382 SIGVTALRM,
1383 #endif
1384 #ifdef SIGXCPU
1385 SIGXCPU,
1386 #endif
1387 #ifdef SIGXFSZ
1388 SIGXFSZ,
1389 #endif
1391 enum { nsigs = ARRAY_CARDINALITY (sig) };
1393 struct sigaction act;
1395 sigemptyset (&caught_signals);
1396 for (i = 0; i < nsigs; i++)
1398 sigaction (sig[i], nullptr, &act);
1399 if (act.sa_handler != SIG_IGN)
1400 sigaddset (&caught_signals, sig[i]);
1403 act.sa_handler = interrupt_handler;
1404 act.sa_mask = caught_signals;
1405 act.sa_flags = 0;
1407 for (i = 0; i < nsigs; i++)
1408 if (sigismember (&caught_signals, sig[i]))
1409 sigaction (sig[i], &act, nullptr);
1412 split_file ();
1414 if (close (STDIN_FILENO) != 0)
1416 error (0, errno, _("read error"));
1417 cleanup_fatal ();
1420 return EXIT_SUCCESS;
1423 void
1424 usage (int status)
1426 if (status != EXIT_SUCCESS)
1427 emit_try_help ();
1428 else
1430 printf (_("\
1431 Usage: %s [OPTION]... FILE PATTERN...\n\
1433 program_name);
1434 fputs (_("\
1435 Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ...,\n\
1436 and output byte counts of each piece to standard output.\n\
1437 "), stdout);
1438 fputs (_("\
1440 Read standard input if FILE is -\n\
1441 "), stdout);
1443 emit_mandatory_arg_note ();
1445 fputs (_("\
1446 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1447 -f, --prefix=PREFIX use PREFIX instead of 'xx'\n\
1448 -k, --keep-files do not remove output files on errors\n\
1449 "), stdout);
1450 fputs (_("\
1451 --suppress-matched suppress the lines matching PATTERN\n\
1452 "), stdout);
1453 fputs (_("\
1454 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1455 -s, --quiet, --silent do not print counts of output file sizes\n\
1456 -z, --elide-empty-files suppress empty output files\n\
1457 "), stdout);
1458 fputs (HELP_OPTION_DESCRIPTION, stdout);
1459 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1460 fputs (_("\
1462 Each PATTERN may be:\n\
1463 INTEGER copy up to but not including specified line number\n\
1464 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1465 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1466 {INTEGER} repeat the previous pattern specified number of times\n\
1467 {*} repeat the previous pattern as many times as possible\n\
1469 A line OFFSET is an integer optionally preceded by '+' or '-'\n\
1470 "), stdout);
1471 emit_ancillary_info (PROGRAM_NAME);
1473 exit (status);