.
[coreutils.git] / src / csplit.c
blobfd5ce30e7aa21c7236ec01276bd903b484438c94
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
21 #include <config.h>
23 #include <stdio.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <signal.h>
28 #include "system.h"
30 #include <regex.h>
32 #include "error.h"
33 #include "inttostr.h"
34 #include "safe-read.h"
35 #include "xstrtol.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "csplit"
40 #define AUTHORS "Stuart Kemp", "David MacKenzie"
42 /* Increment size of area for control records. */
43 #define ALLOC_SIZE 20
45 /* The default prefix for output file names. */
46 #define DEFAULT_PREFIX "xx"
48 /* A compiled pattern arg. */
49 struct control
51 char *regexpr; /* Non-compiled regular expression. */
52 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
53 intmax_t offset; /* Offset from regexp to split at. */
54 uintmax_t lines_required; /* Number of lines required. */
55 uintmax_t repeat; /* Repeat count. */
56 int argnum; /* ARGV index. */
57 bool repeat_forever; /* True if `*' used as a repeat count. */
58 bool ignore; /* If true, produce no output (for regexp). */
61 /* Initial size of data area in buffers. */
62 #define START_SIZE 8191
64 /* Increment size for data area. */
65 #define INCR_SIZE 2048
67 /* Number of lines kept in each node in line list. */
68 #define CTRL_SIZE 80
70 #ifdef DEBUG
71 /* Some small values to test the algorithms. */
72 # define START_SIZE 200
73 # define INCR_SIZE 10
74 # define CTRL_SIZE 1
75 #endif
77 /* A string with a length count. */
78 struct cstring
80 size_t len;
81 char *str;
84 /* Pointers to the beginnings of lines in the buffer area.
85 These structures are linked together if needed. */
86 struct line
88 unsigned int used; /* Number of offsets used in this struct. */
89 unsigned int insert_index; /* Next offset to use when inserting line. */
90 unsigned int retrieve_index; /* Next index to use when retrieving line. */
91 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
92 struct line *next; /* Next in linked list. */
95 /* The structure to hold the input lines.
96 Contains a pointer to the data area and a list containing
97 pointers to the individual lines. */
98 struct buffer_record
100 size_t bytes_alloc; /* Size of the buffer area. */
101 size_t bytes_used; /* Bytes used in the buffer area. */
102 uintmax_t start_line; /* First line number in this buffer. */
103 uintmax_t first_available; /* First line that can be retrieved. */
104 size_t num_lines; /* Number of complete lines in this buffer. */
105 char *buffer; /* Data area. */
106 struct line *line_start; /* Head of list of pointers to lines. */
107 struct line *curr_line; /* The line start record currently in use. */
108 struct buffer_record *next;
111 static void close_output_file (void);
112 static void create_output_file (void);
113 static void delete_all_files (void);
114 static void save_line_to_file (const struct cstring *line);
115 void usage (int status);
117 /* The name this program was run with. */
118 char *program_name;
120 /* Input file descriptor. */
121 static int input_desc = 0;
123 /* Start of buffer list. */
124 static struct buffer_record *head = NULL;
126 /* Partially read line. */
127 static char *hold_area = NULL;
129 /* Number of bytes in `hold_area'. */
130 static size_t hold_count = 0;
132 /* Number of the last line in the buffers. */
133 static uintmax_t last_line_number = 0;
135 /* Number of the line currently being examined. */
136 static uintmax_t current_line = 0;
138 /* If true, we have read EOF. */
139 static bool have_read_eof = false;
141 /* Name of output files. */
142 static char *filename_space = NULL;
144 /* Prefix part of output file names. */
145 static char *prefix = NULL;
147 /* Suffix part of output file names. */
148 static char *suffix = NULL;
150 /* Number of digits to use in output file names. */
151 static int digits = 2;
153 /* Number of files created so far. */
154 static unsigned int files_created = 0;
156 /* Number of bytes written to current file. */
157 static uintmax_t bytes_written;
159 /* Output file pointer. */
160 static FILE *output_stream = NULL;
162 /* Output file name. */
163 static char *output_filename = NULL;
165 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
166 static char **global_argv;
168 /* If true, do not print the count of bytes in each output file. */
169 static bool suppress_count;
171 /* If true, remove output files on error. */
172 static bool remove_files;
174 /* If true, remove all output files which have a zero length. */
175 static bool elide_empty_files;
177 /* The compiled pattern arguments, which determine how to split
178 the input file. */
179 static struct control *controls;
181 /* Number of elements in `controls'. */
182 static size_t control_used;
184 static struct option const longopts[] =
186 {"digits", required_argument, NULL, 'n'},
187 {"quiet", no_argument, NULL, 'q'},
188 {"silent", no_argument, NULL, 's'},
189 {"keep-files", no_argument, NULL, 'k'},
190 {"elide-empty-files", no_argument, NULL, 'z'},
191 {"prefix", required_argument, NULL, 'f'},
192 {"suffix-format", required_argument, NULL, 'b'},
193 {GETOPT_HELP_OPTION_DECL},
194 {GETOPT_VERSION_OPTION_DECL},
195 {NULL, 0, NULL, 0}
198 /* Optionally remove files created so far; then exit.
199 Called when an error detected. */
201 static void
202 cleanup (void)
204 close_output_file ();
206 if (remove_files)
207 delete_all_files ();
210 static void
211 cleanup_fatal (void)
213 cleanup ();
214 exit (EXIT_FAILURE);
217 static RETSIGTYPE
218 interrupt_handler (int sig)
220 #ifdef SA_NOCLDSTOP
221 struct sigaction sigact;
223 sigact.sa_handler = SIG_DFL;
224 sigemptyset (&sigact.sa_mask);
225 sigact.sa_flags = 0;
226 sigaction (sig, &sigact, NULL);
227 #else
228 signal (sig, SIG_DFL);
229 #endif
230 cleanup ();
231 raise (sig);
234 /* Keep track of NUM bytes of a partial line in buffer START.
235 These bytes will be retrieved later when another large buffer is read.
236 It is not necessary to create a new buffer for these bytes; instead,
237 we keep a pointer to the existing buffer. This buffer *is* on the
238 free list, and when the next buffer is obtained from this list
239 (even if it is this one), these bytes will be placed at the
240 start of the new buffer. */
242 static void
243 save_to_hold_area (char *start, size_t num)
245 hold_area = start;
246 hold_count = num;
249 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
250 Return the number of bytes read. */
252 static size_t
253 read_input (char *dest, size_t max_n_bytes)
255 size_t bytes_read;
257 if (max_n_bytes == 0)
258 return 0;
260 bytes_read = safe_read (input_desc, dest, max_n_bytes);
262 if (bytes_read == 0)
263 have_read_eof = true;
265 if (bytes_read == SAFE_READ_ERROR)
267 error (0, errno, _("read error"));
268 cleanup_fatal ();
271 return bytes_read;
274 /* Initialize existing line record P. */
276 static void
277 clear_line_control (struct line *p)
279 p->used = 0;
280 p->insert_index = 0;
281 p->retrieve_index = 0;
284 /* Return a new, initialized line record. */
286 static struct line *
287 new_line_control (void)
289 struct line *p = xmalloc (sizeof *p);
291 p->next = NULL;
292 clear_line_control (p);
294 return p;
297 /* Record LINE_START, which is the address of the start of a line
298 of length LINE_LEN in the large buffer, in the lines buffer of B. */
300 static void
301 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
303 struct line *l;
305 /* If there is no existing area to keep line info, get some. */
306 if (b->line_start == NULL)
307 b->line_start = b->curr_line = new_line_control ();
309 /* If existing area for lines is full, get more. */
310 if (b->curr_line->used == CTRL_SIZE)
312 b->curr_line->next = new_line_control ();
313 b->curr_line = b->curr_line->next;
316 l = b->curr_line;
318 /* Record the start of the line, and update counters. */
319 l->starts[l->insert_index].str = line_start;
320 l->starts[l->insert_index].len = line_len;
321 l->used++;
322 l->insert_index++;
325 /* Scan the buffer in B for newline characters
326 and record the line start locations and lengths in B.
327 Return the number of lines found in this buffer.
329 There may be an incomplete line at the end of the buffer;
330 a pointer is kept to this area, which will be used when
331 the next buffer is filled. */
333 static size_t
334 record_line_starts (struct buffer_record *b)
336 char *line_start; /* Start of current line. */
337 char *line_end; /* End of each line found. */
338 size_t bytes_left; /* Length of incomplete last line. */
339 size_t lines; /* Number of lines found. */
340 size_t line_length; /* Length of each line found. */
342 if (b->bytes_used == 0)
343 return 0;
345 lines = 0;
346 line_start = b->buffer;
347 bytes_left = b->bytes_used;
349 for (;;)
351 line_end = memchr (line_start, '\n', bytes_left);
352 if (line_end == NULL)
353 break;
354 line_length = line_end - line_start + 1;
355 keep_new_line (b, line_start, line_length);
356 bytes_left -= line_length;
357 line_start = line_end + 1;
358 lines++;
361 /* Check for an incomplete last line. */
362 if (bytes_left)
364 if (have_read_eof)
366 keep_new_line (b, line_start, bytes_left);
367 lines++;
369 else
370 save_to_hold_area (line_start, bytes_left);
373 b->num_lines = lines;
374 b->first_available = b->start_line = last_line_number + 1;
375 last_line_number += lines;
377 return lines;
380 /* Return a new buffer with room to store SIZE bytes, plus
381 an extra byte for safety. */
383 static struct buffer_record *
384 create_new_buffer (size_t size)
386 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
388 new_buffer->buffer = xmalloc (size + 1);
390 new_buffer->bytes_alloc = size;
391 new_buffer->line_start = new_buffer->curr_line = NULL;
393 return new_buffer;
396 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
397 least that size is currently free, use it, otherwise create a new one. */
399 static struct buffer_record *
400 get_new_buffer (size_t min_size)
402 struct buffer_record *new_buffer; /* Buffer to return. */
403 size_t alloc_size; /* Actual size that will be requested. */
405 alloc_size = START_SIZE;
406 if (alloc_size < min_size)
408 size_t s = min_size - alloc_size + INCR_SIZE - 1;
409 alloc_size += s - s % INCR_SIZE;
412 new_buffer = create_new_buffer (alloc_size);
414 new_buffer->num_lines = 0;
415 new_buffer->bytes_used = 0;
416 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
417 new_buffer->next = NULL;
419 return new_buffer;
422 static void
423 free_buffer (struct buffer_record *buf)
425 free (buf->buffer);
428 /* Append buffer BUF to the linked list of buffers that contain
429 some data yet to be processed. */
431 static void
432 save_buffer (struct buffer_record *buf)
434 struct buffer_record *p;
436 buf->next = NULL;
437 buf->curr_line = buf->line_start;
439 if (head == NULL)
440 head = buf;
441 else
443 for (p = head; p->next; p = p->next)
444 /* Do nothing. */ ;
445 p->next = buf;
449 /* Fill a buffer of input.
451 Set the initial size of the buffer to a default.
452 Fill the buffer (from the hold area and input stream)
453 and find the individual lines.
454 If no lines are found (the buffer is too small to hold the next line),
455 release the current buffer (whose contents would have been put in the
456 hold area) and repeat the process with another large buffer until at least
457 one entire line has been read.
459 Return true if a new buffer was obtained, otherwise false
460 (in which case end-of-file must have been encountered). */
462 static bool
463 load_buffer (void)
465 struct buffer_record *b;
466 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
467 size_t bytes_avail; /* Size of new buffer created. */
468 size_t lines_found; /* Number of lines in this new buffer. */
469 char *p; /* Place to load into buffer. */
471 if (have_read_eof)
472 return false;
474 /* We must make the buffer at least as large as the amount of data
475 in the partial line left over from the last call. */
476 if (bytes_wanted < hold_count)
477 bytes_wanted = hold_count;
481 b = get_new_buffer (bytes_wanted);
482 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
483 p = b->buffer;
485 /* First check the `holding' area for a partial line. */
486 if (hold_count)
488 if (p != hold_area)
489 memcpy (p, hold_area, hold_count);
490 p += hold_count;
491 b->bytes_used += hold_count;
492 bytes_avail -= hold_count;
493 hold_count = 0;
496 b->bytes_used += read_input (p, bytes_avail);
498 lines_found = record_line_starts (b);
499 bytes_wanted = b->bytes_alloc * 2;
500 if (!lines_found)
501 free_buffer (b);
503 while (!lines_found && !have_read_eof);
505 if (lines_found)
506 save_buffer (b);
508 return lines_found != 0;
511 /* Return the line number of the first line that has not yet been retrieved. */
513 static uintmax_t
514 get_first_line_in_buffer (void)
516 if (head == NULL && !load_buffer ())
517 error (EXIT_FAILURE, errno, _("input disappeared"));
519 return head->first_available;
522 /* Return a pointer to the logical first line in the buffer and make the
523 next line the logical first line.
524 Return NULL if there is no more input. */
526 static struct cstring *
527 remove_line (void)
529 /* If non-NULL, this is the buffer for which the previous call
530 returned the final line. So now, presuming that line has been
531 processed, we can free the buffer and reset this pointer. */
532 static struct buffer_record *prev_buf = NULL;
534 struct cstring *line; /* Return value. */
535 struct line *l; /* For convenience. */
537 if (prev_buf)
539 free_buffer (prev_buf);
540 prev_buf = NULL;
543 if (head == NULL && !load_buffer ())
544 return NULL;
546 if (current_line < head->first_available)
547 current_line = head->first_available;
549 ++(head->first_available);
551 l = head->curr_line;
553 line = &l->starts[l->retrieve_index];
555 /* Advance index to next line. */
556 if (++l->retrieve_index == l->used)
558 /* Go on to the next line record. */
559 head->curr_line = l->next;
560 if (head->curr_line == NULL || head->curr_line->used == 0)
562 /* Go on to the next data block.
563 but first record the current one so we can free it
564 once the line we're returning has been processed. */
565 prev_buf = head;
566 head = head->next;
570 return line;
573 /* Search the buffers for line LINENUM, reading more input if necessary.
574 Return a pointer to the line, or NULL if it is not found in the file. */
576 static struct cstring *
577 find_line (uintmax_t linenum)
579 struct buffer_record *b;
581 if (head == NULL && !load_buffer ())
582 return NULL;
584 if (linenum < head->start_line)
585 return NULL;
587 for (b = head;;)
589 if (linenum < b->start_line + b->num_lines)
591 /* The line is in this buffer. */
592 struct line *l;
593 size_t offset; /* How far into the buffer the line is. */
595 l = b->line_start;
596 offset = linenum - b->start_line;
597 /* Find the control record. */
598 while (offset >= CTRL_SIZE)
600 l = l->next;
601 offset -= CTRL_SIZE;
603 return &l->starts[offset];
605 if (b->next == NULL && !load_buffer ())
606 return NULL;
607 b = b->next; /* Try the next data block. */
611 /* Return true if at least one more line is available for input. */
613 static bool
614 no_more_lines (void)
616 return find_line (current_line + 1) == NULL;
619 /* Set the name of the input file to NAME and open it. */
621 static void
622 set_input_file (const char *name)
624 if (STREQ (name, "-"))
625 input_desc = 0;
626 else
628 input_desc = open (name, O_RDONLY);
629 if (input_desc < 0)
630 error (EXIT_FAILURE, errno, "%s", name);
634 /* Write all lines from the beginning of the buffer up to, but
635 not including, line LAST_LINE, to the current output file.
636 If IGNORE is true, do not output lines selected here.
637 ARGNUM is the index in ARGV of the current pattern. */
639 static void
640 write_to_file (uintmax_t last_line, bool ignore, int argnum)
642 struct cstring *line;
643 uintmax_t first_line; /* First available input line. */
644 uintmax_t lines; /* Number of lines to output. */
645 uintmax_t i;
647 first_line = get_first_line_in_buffer ();
649 if (first_line > last_line)
651 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
652 cleanup_fatal ();
655 lines = last_line - first_line;
657 for (i = 0; i < lines; i++)
659 line = remove_line ();
660 if (line == NULL)
662 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
663 cleanup_fatal ();
665 if (!ignore)
666 save_line_to_file (line);
670 /* Output any lines left after all regexps have been processed. */
672 static void
673 dump_rest_of_file (void)
675 struct cstring *line;
677 while ((line = remove_line ()) != NULL)
678 save_line_to_file (line);
681 /* Handle an attempt to read beyond EOF under the control of record P,
682 on iteration REPETITION if nonzero. */
684 static void
685 handle_line_error (const struct control *p, uintmax_t repetition)
687 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
689 fprintf (stderr, _("%s: `%s': line number out of range"),
690 program_name, umaxtostr (p->lines_required, buf));
691 if (repetition)
692 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
693 else
694 fprintf (stderr, "\n");
696 cleanup_fatal ();
699 /* Determine the line number that marks the end of this file,
700 then get those lines and save them to the output file.
701 P is the control record.
702 REPETITION is the repetition number. */
704 static void
705 process_line_count (const struct control *p, uintmax_t repetition)
707 uintmax_t linenum;
708 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
709 struct cstring *line;
711 create_output_file ();
713 linenum = get_first_line_in_buffer ();
715 while (linenum++ < last_line_to_save)
717 line = remove_line ();
718 if (line == NULL)
719 handle_line_error (p, repetition);
720 save_line_to_file (line);
723 close_output_file ();
725 /* Ensure that the line number specified is not 1 greater than
726 the number of lines in the file. */
727 if (no_more_lines ())
728 handle_line_error (p, repetition);
731 static void
732 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
734 fprintf (stderr, _("%s: `%s': match not found"),
735 program_name, global_argv[p->argnum]);
737 if (repetition)
739 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
740 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
742 else
743 fprintf (stderr, "\n");
745 if (!ignore)
747 dump_rest_of_file ();
748 close_output_file ();
750 cleanup_fatal ();
753 /* Read the input until a line matches the regexp in P, outputting
754 it unless P->IGNORE is true.
755 REPETITION is this repeat-count; 0 means the first time. */
757 static void
758 process_regexp (struct control *p, uintmax_t repetition)
760 struct cstring *line; /* From input file. */
761 size_t line_len; /* To make "$" in regexps work. */
762 uintmax_t break_line; /* First line number of next file. */
763 bool ignore = p->ignore; /* If true, skip this section. */
764 int ret;
766 if (!ignore)
767 create_output_file ();
769 /* If there is no offset for the regular expression, or
770 it is positive, then it is not necessary to buffer the lines. */
772 if (p->offset >= 0)
774 for (;;)
776 line = find_line (++current_line);
777 if (line == NULL)
779 if (p->repeat_forever)
781 if (!ignore)
783 dump_rest_of_file ();
784 close_output_file ();
786 exit (EXIT_SUCCESS);
788 else
789 regexp_error (p, repetition, ignore);
791 line_len = line->len;
792 if (line->str[line_len - 1] == '\n')
793 line_len--;
794 ret = re_search (&p->re_compiled, line->str, line_len,
795 0, line_len, NULL);
796 if (ret == -2)
798 error (0, 0, _("error in regular expression search"));
799 cleanup_fatal ();
801 if (ret == -1)
803 line = remove_line ();
804 if (!ignore)
805 save_line_to_file (line);
807 else
808 break;
811 else
813 /* Buffer the lines. */
814 for (;;)
816 line = find_line (++current_line);
817 if (line == NULL)
819 if (p->repeat_forever)
821 if (!ignore)
823 dump_rest_of_file ();
824 close_output_file ();
826 exit (EXIT_SUCCESS);
828 else
829 regexp_error (p, repetition, ignore);
831 line_len = line->len;
832 if (line->str[line_len - 1] == '\n')
833 line_len--;
834 ret = re_search (&p->re_compiled, line->str, line_len,
835 0, line_len, NULL);
836 if (ret == -2)
838 error (0, 0, _("error in regular expression search"));
839 cleanup_fatal ();
841 if (ret >= 0)
842 break;
846 /* Account for any offset from this regexp. */
847 break_line = current_line + p->offset;
849 write_to_file (break_line, ignore, p->argnum);
851 if (!ignore)
852 close_output_file ();
854 if (p->offset > 0)
855 current_line = break_line;
858 /* Split the input file according to the control records we have built. */
860 static void
861 split_file (void)
863 size_t i;
865 for (i = 0; i < control_used; i++)
867 uintmax_t j;
868 if (controls[i].regexpr)
870 for (j = 0; (controls[i].repeat_forever
871 || j <= controls[i].repeat); j++)
872 process_regexp (&controls[i], j);
874 else
876 for (j = 0; (controls[i].repeat_forever
877 || j <= controls[i].repeat); j++)
878 process_line_count (&controls[i], j);
882 create_output_file ();
883 dump_rest_of_file ();
884 close_output_file ();
887 /* Return the name of output file number NUM. */
889 static char *
890 make_filename (unsigned int num)
892 strcpy (filename_space, prefix);
893 if (suffix)
894 sprintf (filename_space+strlen(prefix), suffix, num);
895 else
896 sprintf (filename_space+strlen(prefix), "%0*u", digits, num);
897 return filename_space;
900 /* Create the next output file. */
902 static void
903 create_output_file (void)
905 output_filename = make_filename (files_created);
906 output_stream = fopen (output_filename, "w");
907 if (output_stream == NULL)
909 error (0, errno, "%s", output_filename);
910 cleanup_fatal ();
912 files_created++;
913 bytes_written = 0;
916 /* Delete all the files we have created. */
918 static void
919 delete_all_files (void)
921 unsigned int i;
923 for (i = 0; i < files_created; i++)
925 const char *name = make_filename (i);
926 if (unlink (name))
927 error (0, errno, "%s", name);
931 /* Close the current output file and print the count
932 of characters in this file. */
934 static void
935 close_output_file (void)
937 if (output_stream)
939 if (ferror (output_stream))
941 error (0, 0, _("write error for `%s'"), output_filename);
942 output_stream = NULL;
943 cleanup_fatal ();
945 if (fclose (output_stream) != 0)
947 error (0, errno, "%s", output_filename);
948 output_stream = NULL;
949 cleanup_fatal ();
951 if (bytes_written == 0 && elide_empty_files)
953 if (unlink (output_filename))
954 error (0, errno, "%s", output_filename);
955 files_created--;
957 else
959 if (!suppress_count)
961 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
962 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
965 output_stream = NULL;
969 /* Save line LINE to the output file and
970 increment the character count for the current file. */
972 static void
973 save_line_to_file (const struct cstring *line)
975 fwrite (line->str, sizeof (char), line->len, output_stream);
976 bytes_written += line->len;
979 /* Return a new, initialized control record. */
981 static struct control *
982 new_control_record (void)
984 static size_t control_allocated = 0; /* Total space allocated. */
985 struct control *p;
987 if (control_used == control_allocated)
988 controls = x2nrealloc (controls, &control_allocated, sizeof *controls);
989 p = &controls[control_used++];
990 p->regexpr = NULL;
991 p->repeat = 0;
992 p->repeat_forever = false;
993 p->lines_required = 0;
994 p->offset = 0;
995 return p;
998 /* Check if there is a numeric offset after a regular expression.
999 STR is the entire command line argument.
1000 P is the control record for this regular expression.
1001 NUM is the numeric part of STR. */
1003 static void
1004 check_for_offset (struct control *p, const char *str, const char *num)
1006 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1007 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1010 /* Given that the first character of command line arg STR is '{',
1011 make sure that the rest of the string is a valid repeat count
1012 and store its value in P.
1013 ARGNUM is the ARGV index of STR. */
1015 static void
1016 parse_repeat_count (int argnum, struct control *p, char *str)
1018 uintmax_t val;
1019 char *end;
1021 end = str + strlen (str) - 1;
1022 if (*end != '}')
1023 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1024 *end = '\0';
1026 if (str+1 == end-1 && *(str+1) == '*')
1027 p->repeat_forever = true;
1028 else
1030 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1032 error (EXIT_FAILURE, 0,
1033 _("%s}: integer required between `{' and `}'"),
1034 global_argv[argnum]);
1036 p->repeat = val;
1039 *end = '}';
1042 /* Extract the regular expression from STR and check for a numeric offset.
1043 STR should start with the regexp delimiter character.
1044 Return a new control record for the regular expression.
1045 ARGNUM is the ARGV index of STR.
1046 Unless IGNORE is true, mark these lines for output. */
1048 static struct control *
1049 extract_regexp (int argnum, bool ignore, char *str)
1051 size_t len; /* Number of bytes in this regexp. */
1052 char delim = *str;
1053 char *closing_delim;
1054 struct control *p;
1055 const char *err;
1057 closing_delim = strrchr (str + 1, delim);
1058 if (closing_delim == NULL)
1059 error (EXIT_FAILURE, 0,
1060 _("%s: closing delimiter `%c' missing"), str, delim);
1062 len = closing_delim - str - 1;
1063 p = new_control_record ();
1064 p->argnum = argnum;
1065 p->ignore = ignore;
1067 p->regexpr = xmalloc (len + 1);
1068 strncpy (p->regexpr, str + 1, len);
1069 p->re_compiled.allocated = len * 2;
1070 p->re_compiled.buffer = xmalloc (p->re_compiled.allocated);
1071 p->re_compiled.fastmap = xmalloc (1 << CHAR_BIT);
1072 p->re_compiled.translate = 0;
1073 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1074 if (err)
1076 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1077 cleanup_fatal ();
1080 if (closing_delim[1])
1081 check_for_offset (p, str, closing_delim + 1);
1083 return p;
1086 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1087 After each pattern, check if the next argument is a repeat count. */
1089 static void
1090 parse_patterns (int argc, int start, char **argv)
1092 int i; /* Index into ARGV. */
1093 struct control *p; /* New control record created. */
1094 uintmax_t val;
1095 static uintmax_t last_val = 0;
1097 for (i = start; i < argc; i++)
1099 if (*argv[i] == '/' || *argv[i] == '%')
1101 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1103 else
1105 p = new_control_record ();
1106 p->argnum = i;
1108 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1109 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1110 if (val == 0)
1111 error (EXIT_FAILURE, 0,
1112 _("%s: line number must be greater than zero"),
1113 argv[i]);
1114 if (val < last_val)
1116 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1117 error (EXIT_FAILURE, 0,
1118 _("line number `%s' is smaller than preceding line number, %s"),
1119 argv[i], umaxtostr (last_val, buf));
1122 if (val == last_val)
1123 error (0, 0,
1124 _("warning: line number `%s' is the same as preceding line number"),
1125 argv[i]);
1127 last_val = val;
1129 p->lines_required = val;
1132 if (i + 1 < argc && *argv[i + 1] == '{')
1134 /* We have a repeat count. */
1135 i++;
1136 parse_repeat_count (i, p, argv[i]);
1141 static unsigned int
1142 get_format_flags (char **format_ptr)
1144 unsigned int count = 0;
1146 for (; **format_ptr; (*format_ptr)++)
1148 switch (**format_ptr)
1150 case '-':
1151 break;
1153 case '+':
1154 case ' ':
1155 count |= 1;
1156 break;
1158 case '#':
1159 count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
1160 break;
1162 default:
1163 return count;
1166 return count;
1169 static size_t
1170 get_format_width (char **format_ptr)
1172 unsigned long int val = 0;
1174 if (ISDIGIT (**format_ptr)
1175 && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1176 || SIZE_MAX < val))
1177 error (EXIT_FAILURE, 0, _("invalid format width"));
1179 /* Allow for enough octal digits to represent the value of UINT_MAX,
1180 even if the field width is less than that. */
1181 return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
1184 static size_t
1185 get_format_prec (char **format_ptr)
1187 if (**format_ptr != '.')
1188 return 0;
1189 (*format_ptr)++;
1191 if (! ISDIGIT (**format_ptr))
1192 return 0;
1193 else
1195 unsigned long int val;
1196 if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1197 || SIZE_MAX < val)
1198 error (EXIT_FAILURE, 0, _("invalid format precision"));
1199 return val;
1203 static void
1204 get_format_conv_type (char **format_ptr)
1206 unsigned char ch = *(*format_ptr)++;
1208 switch (ch)
1210 case 'd':
1211 case 'i':
1212 case 'o':
1213 case 'u':
1214 case 'x':
1215 case 'X':
1216 break;
1218 case 0:
1219 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1220 break;
1222 default:
1223 if (ISPRINT (ch))
1224 error (EXIT_FAILURE, 0,
1225 _("invalid conversion specifier in suffix: %c"), ch);
1226 else
1227 error (EXIT_FAILURE, 0,
1228 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1232 static size_t
1233 max_out (char *format)
1235 size_t out_count = 0;
1236 bool percent = false;
1238 while (*format)
1240 if (*format++ != '%')
1241 out_count++;
1242 else if (*format == '%')
1244 format++;
1245 out_count++;
1247 else
1249 if (percent)
1250 error (EXIT_FAILURE, 0,
1251 _("too many %% conversion specifications in suffix"));
1252 percent = true;
1253 out_count += get_format_flags (&format);
1255 size_t width = get_format_width (&format);
1256 size_t prec = get_format_prec (&format);
1258 out_count += MAX (width, prec);
1260 get_format_conv_type (&format);
1264 if (! percent)
1265 error (EXIT_FAILURE, 0,
1266 _("missing %% conversion specification in suffix"));
1268 return out_count;
1272 main (int argc, char **argv)
1274 int optc;
1275 unsigned long int val;
1276 #ifdef SA_NOCLDSTOP
1277 struct sigaction oldact, newact;
1278 #endif
1280 initialize_main (&argc, &argv);
1281 program_name = argv[0];
1282 setlocale (LC_ALL, "");
1283 bindtextdomain (PACKAGE, LOCALEDIR);
1284 textdomain (PACKAGE);
1286 atexit (close_stdout);
1288 global_argv = argv;
1289 controls = NULL;
1290 control_used = 0;
1291 suppress_count = false;
1292 remove_files = true;
1293 prefix = DEFAULT_PREFIX;
1295 /* Change the way xmalloc and xrealloc fail. */
1296 xalloc_fail_func = cleanup;
1298 #ifdef SA_NOCLDSTOP
1299 newact.sa_handler = interrupt_handler;
1300 sigemptyset (&newact.sa_mask);
1301 newact.sa_flags = 0;
1303 sigaction (SIGHUP, NULL, &oldact);
1304 if (oldact.sa_handler != SIG_IGN)
1305 sigaction (SIGHUP, &newact, NULL);
1307 sigaction (SIGINT, NULL, &oldact);
1308 if (oldact.sa_handler != SIG_IGN)
1309 sigaction (SIGINT, &newact, NULL);
1311 sigaction (SIGQUIT, NULL, &oldact);
1312 if (oldact.sa_handler != SIG_IGN)
1313 sigaction (SIGQUIT, &newact, NULL);
1315 sigaction (SIGTERM, NULL, &oldact);
1316 if (oldact.sa_handler != SIG_IGN)
1317 sigaction (SIGTERM, &newact, NULL);
1318 #else
1319 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1320 signal (SIGHUP, interrupt_handler);
1321 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1322 signal (SIGINT, interrupt_handler);
1323 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1324 signal (SIGQUIT, interrupt_handler);
1325 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1326 signal (SIGTERM, interrupt_handler);
1327 #endif
1329 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1330 switch (optc)
1332 case 0:
1333 break;
1335 case 'f':
1336 prefix = optarg;
1337 break;
1339 case 'b':
1340 suffix = optarg;
1341 break;
1343 case 'k':
1344 remove_files = false;
1345 break;
1347 case 'n':
1348 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1349 || val > INT_MAX)
1350 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1351 digits = (int) val;
1352 break;
1354 case 's':
1355 case 'q':
1356 suppress_count = true;
1357 break;
1359 case 'z':
1360 elide_empty_files = true;
1361 break;
1363 case_GETOPT_HELP_CHAR;
1365 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1367 default:
1368 usage (EXIT_FAILURE);
1371 if (argc - optind < 2)
1373 error (0, 0, _("too few arguments"));
1374 usage (EXIT_FAILURE);
1377 if (suffix)
1378 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1379 else
1380 filename_space = xmalloc (strlen (prefix) + digits + 2);
1382 set_input_file (argv[optind++]);
1384 parse_patterns (argc, optind, argv);
1386 split_file ();
1388 if (close (input_desc) < 0)
1390 error (0, errno, _("read error"));
1391 cleanup_fatal ();
1394 exit (EXIT_SUCCESS);
1397 void
1398 usage (int status)
1400 if (status != 0)
1401 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1402 program_name);
1403 else
1405 printf (_("\
1406 Usage: %s [OPTION]... FILE PATTERN...\n\
1408 program_name);
1409 fputs (_("\
1410 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1411 and output byte counts of each piece to standard output.\n\
1413 "), stdout);
1414 fputs (_("\
1415 Mandatory arguments to long options are mandatory for short options too.\n\
1416 "), stdout);
1417 fputs (_("\
1418 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1419 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1420 -k, --keep-files do not remove output files on errors\n\
1421 "), stdout);
1422 fputs (_("\
1423 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1424 -s, --quiet, --silent do not print counts of output file sizes\n\
1425 -z, --elide-empty-files remove empty output files\n\
1426 "), stdout);
1427 fputs (HELP_OPTION_DESCRIPTION, stdout);
1428 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1429 fputs (_("\
1431 Read standard input if FILE is -. Each PATTERN may be:\n\
1432 "), stdout);
1433 fputs (_("\
1435 INTEGER copy up to but not including specified line number\n\
1436 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1437 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1438 {INTEGER} repeat the previous pattern specified number of times\n\
1439 {*} repeat the previous pattern as many times as possible\n\
1441 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1442 "), stdout);
1443 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
1445 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);