*** empty log message ***
[coreutils.git] / src / csplit.c
blobebc930686e24845cee28f12cb80123a0339dce72
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2002 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
21 #include <config.h>
23 #include <stdio.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <signal.h>
28 #include "system.h"
29 #include "closeout.h"
31 #include <regex.h>
33 #include "error.h"
34 #include "human.h"
35 #include "safe-read.h"
36 #include "xstrtol.h"
38 /* The official name of this program (e.g., no `g' prefix). */
39 #define PROGRAM_NAME "csplit"
41 #define AUTHORS N_ ("Stuart Kemp and David MacKenzie")
43 #ifdef STDC_HEADERS
44 # include <stdlib.h>
45 #endif
47 #ifndef TRUE
48 # define FALSE 0
49 # define TRUE 1
50 #endif
52 /* Increment size of area for control records. */
53 #define ALLOC_SIZE 20
55 /* The default prefix for output file names. */
56 #define DEFAULT_PREFIX "xx"
58 typedef int boolean;
60 /* A compiled pattern arg. */
61 struct control
63 char *regexpr; /* Non-compiled regular expression. */
64 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
65 int offset; /* Offset from regexp to split at. */
66 uintmax_t lines_required; /* Number of lines required. */
67 uintmax_t repeat; /* Repeat count. */
68 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
69 int argnum; /* ARGV index. */
70 boolean ignore; /* If true, produce no output (for regexp). */
73 /* Initial size of data area in buffers. */
74 #define START_SIZE 8191
76 /* Increment size for data area. */
77 #define INCR_SIZE 2048
79 /* Number of lines kept in each node in line list. */
80 #define CTRL_SIZE 80
82 #ifdef DEBUG
83 /* Some small values to test the algorithms. */
84 # define START_SIZE 200
85 # define INCR_SIZE 10
86 # define CTRL_SIZE 1
87 #endif
89 /* A string with a length count. */
90 struct cstring
92 int len;
93 char *str;
96 /* Pointers to the beginnings of lines in the buffer area.
97 These structures are linked together if needed. */
98 struct line
100 unsigned used; /* Number of offsets used in this struct. */
101 unsigned insert_index; /* Next offset to use when inserting line. */
102 unsigned retrieve_index; /* Next index to use when retrieving line. */
103 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
104 struct line *next; /* Next in linked list. */
107 /* The structure to hold the input lines.
108 Contains a pointer to the data area and a list containing
109 pointers to the individual lines. */
110 struct buffer_record
112 unsigned bytes_alloc; /* Size of the buffer area. */
113 unsigned bytes_used; /* Bytes used in the buffer area. */
114 unsigned start_line; /* First line number in this buffer. */
115 unsigned first_available; /* First line that can be retrieved. */
116 unsigned num_lines; /* Number of complete lines in this buffer. */
117 char *buffer; /* Data area. */
118 struct line *line_start; /* Head of list of pointers to lines. */
119 struct line *curr_line; /* The line start record currently in use. */
120 struct buffer_record *next;
123 static void close_output_file PARAMS ((void));
124 static void create_output_file PARAMS ((void));
125 static void delete_all_files PARAMS ((void));
126 static void save_line_to_file PARAMS ((const struct cstring *line));
127 void usage PARAMS ((int status));
129 /* The name this program was run with. */
130 char *program_name;
132 /* Convert the number of 8-bit bytes of a binary representation to
133 the number of characters required to represent the same quantity
134 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
135 require a field width as wide as 11 characters. */
136 static const unsigned int bytes_to_octal_digits[] =
137 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
139 /* Input file descriptor. */
140 static int input_desc = 0;
142 /* List of available buffers. */
143 static struct buffer_record *free_list = NULL;
145 /* Start of buffer list. */
146 static struct buffer_record *head = NULL;
148 /* Partially read line. */
149 static char *hold_area = NULL;
151 /* Number of chars in `hold_area'. */
152 static unsigned hold_count = 0;
154 /* Number of the last line in the buffers. */
155 static unsigned last_line_number = 0;
157 /* Number of the line currently being examined. */
158 static unsigned current_line = 0;
160 /* If TRUE, we have read EOF. */
161 static boolean have_read_eof = FALSE;
163 /* Name of output files. */
164 static char *filename_space = NULL;
166 /* Prefix part of output file names. */
167 static char *prefix = NULL;
169 /* Suffix part of output file names. */
170 static char *suffix = NULL;
172 /* Number of digits to use in output file names. */
173 static int digits = 2;
175 /* Number of files created so far. */
176 static unsigned int files_created = 0;
178 /* Number of bytes written to current file. */
179 static unsigned int bytes_written;
181 /* Output file pointer. */
182 static FILE *output_stream = NULL;
184 /* Output file name. */
185 static char *output_filename = NULL;
187 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
188 static char **global_argv;
190 /* If TRUE, do not print the count of bytes in each output file. */
191 static boolean suppress_count;
193 /* If TRUE, remove output files on error. */
194 static boolean remove_files;
196 /* If TRUE, remove all output files which have a zero length. */
197 static boolean elide_empty_files;
199 /* The compiled pattern arguments, which determine how to split
200 the input file. */
201 static struct control *controls;
203 /* Number of elements in `controls'. */
204 static unsigned int control_used;
206 static struct option const longopts[] =
208 {"digits", required_argument, NULL, 'n'},
209 {"quiet", no_argument, NULL, 'q'},
210 {"silent", no_argument, NULL, 's'},
211 {"keep-files", no_argument, NULL, 'k'},
212 {"elide-empty-files", no_argument, NULL, 'z'},
213 {"prefix", required_argument, NULL, 'f'},
214 {"suffix-format", required_argument, NULL, 'b'},
215 {GETOPT_HELP_OPTION_DECL},
216 {GETOPT_VERSION_OPTION_DECL},
217 {NULL, 0, NULL, 0}
220 /* Optionally remove files created so far; then exit.
221 Called when an error detected. */
223 static void
224 cleanup (void)
226 if (output_stream)
227 close_output_file ();
229 if (remove_files)
230 delete_all_files ();
233 static void
234 cleanup_fatal (void)
236 cleanup ();
237 exit (EXIT_FAILURE);
240 static RETSIGTYPE
241 interrupt_handler (int sig)
243 #ifdef SA_NOCLDSTOP
244 struct sigaction sigact;
246 sigact.sa_handler = SIG_DFL;
247 sigemptyset (&sigact.sa_mask);
248 sigact.sa_flags = 0;
249 sigaction (sig, &sigact, NULL);
250 #else
251 signal (sig, SIG_DFL);
252 #endif
253 cleanup ();
254 kill (getpid (), sig);
257 /* Keep track of NUM chars of a partial line in buffer START.
258 These chars will be retrieved later when another large buffer is read.
259 It is not necessary to create a new buffer for these chars; instead,
260 we keep a pointer to the existing buffer. This buffer *is* on the
261 free list, and when the next buffer is obtained from this list
262 (even if it is this one), these chars will be placed at the
263 start of the new buffer. */
265 static void
266 save_to_hold_area (char *start, unsigned int num)
268 hold_area = start;
269 hold_count = num;
272 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
273 Return the number of chars read. */
275 static int
276 read_input (char *dest, unsigned int max_n_bytes)
278 int bytes_read;
280 if (max_n_bytes == 0)
281 return 0;
283 bytes_read = safe_read (input_desc, dest, max_n_bytes);
285 if (bytes_read == 0)
286 have_read_eof = TRUE;
288 if (bytes_read < 0)
290 error (0, errno, _("read error"));
291 cleanup_fatal ();
294 return bytes_read;
297 /* Initialize existing line record P. */
299 static void
300 clear_line_control (struct line *p)
302 p->used = 0;
303 p->insert_index = 0;
304 p->retrieve_index = 0;
307 /* Initialize all line records in B. */
309 static void
310 clear_all_line_control (struct buffer_record *b)
312 struct line *l;
314 for (l = b->line_start; l; l = l->next)
315 clear_line_control (l);
318 /* Return a new, initialized line record. */
320 static struct line *
321 new_line_control (void)
323 struct line *p;
325 p = (struct line *) xmalloc (sizeof (struct line));
327 p->next = NULL;
328 clear_line_control (p);
330 return p;
333 /* Record LINE_START, which is the address of the start of a line
334 of length LINE_LEN in the large buffer, in the lines buffer of B. */
336 static void
337 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
339 struct line *l;
341 /* If there is no existing area to keep line info, get some. */
342 if (b->line_start == NULL)
343 b->line_start = b->curr_line = new_line_control ();
345 /* If existing area for lines is full, get more. */
346 if (b->curr_line->used == CTRL_SIZE)
348 b->curr_line->next = new_line_control ();
349 b->curr_line = b->curr_line->next;
352 l = b->curr_line;
354 /* Record the start of the line, and update counters. */
355 l->starts[l->insert_index].str = line_start;
356 l->starts[l->insert_index].len = line_len;
357 l->used++;
358 l->insert_index++;
361 /* Scan the buffer in B for newline characters
362 and record the line start locations and lengths in B.
363 Return the number of lines found in this buffer.
365 There may be an incomplete line at the end of the buffer;
366 a pointer is kept to this area, which will be used when
367 the next buffer is filled. */
369 static unsigned int
370 record_line_starts (struct buffer_record *b)
372 char *line_start; /* Start of current line. */
373 char *line_end; /* End of each line found. */
374 unsigned int bytes_left; /* Length of incomplete last line. */
375 unsigned int lines; /* Number of lines found. */
376 unsigned int line_length; /* Length of each line found. */
378 if (b->bytes_used == 0)
379 return 0;
381 lines = 0;
382 line_start = b->buffer;
383 bytes_left = b->bytes_used;
385 for (;;)
387 line_end = memchr (line_start, '\n', bytes_left);
388 if (line_end == NULL)
389 break;
390 line_length = line_end - line_start + 1;
391 keep_new_line (b, line_start, line_length);
392 bytes_left -= line_length;
393 line_start = line_end + 1;
394 lines++;
397 /* Check for an incomplete last line. */
398 if (bytes_left)
400 if (have_read_eof)
402 keep_new_line (b, line_start, bytes_left);
403 lines++;
405 else
406 save_to_hold_area (line_start, bytes_left);
409 b->num_lines = lines;
410 b->first_available = b->start_line = last_line_number + 1;
411 last_line_number += lines;
413 return lines;
416 /* Return a new buffer with room to store SIZE bytes, plus
417 an extra byte for safety. */
419 static struct buffer_record *
420 create_new_buffer (unsigned int size)
422 struct buffer_record *new_buffer;
424 new_buffer = (struct buffer_record *)
425 xmalloc (sizeof (struct buffer_record));
427 new_buffer->buffer = (char *) xmalloc (size + 1);
429 new_buffer->bytes_alloc = size;
430 new_buffer->line_start = new_buffer->curr_line = NULL;
432 return new_buffer;
435 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
436 least that size is currently free, use it, otherwise create a new one. */
438 static struct buffer_record *
439 get_new_buffer (unsigned int min_size)
441 struct buffer_record *p, *q;
442 struct buffer_record *new_buffer; /* Buffer to return. */
443 unsigned int alloc_size; /* Actual size that will be requested. */
445 alloc_size = START_SIZE;
446 while (min_size > alloc_size)
447 alloc_size += INCR_SIZE;
449 if (free_list == NULL)
450 new_buffer = create_new_buffer (alloc_size);
451 else
453 /* Use first-fit to find a buffer. */
454 p = new_buffer = NULL;
455 q = free_list;
459 if (q->bytes_alloc >= min_size)
461 if (p == NULL)
462 free_list = q->next;
463 else
464 p->next = q->next;
465 break;
467 p = q;
468 q = q->next;
470 while (q);
472 new_buffer = (q ? q : create_new_buffer (alloc_size));
474 new_buffer->curr_line = new_buffer->line_start;
475 clear_all_line_control (new_buffer);
478 new_buffer->num_lines = 0;
479 new_buffer->bytes_used = 0;
480 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
481 new_buffer->next = NULL;
483 return new_buffer;
486 /* Add buffer BUF to the list of free buffers. */
488 static void
489 free_buffer (struct buffer_record *buf)
491 buf->next = free_list;
492 free_list = buf;
495 /* Append buffer BUF to the linked list of buffers that contain
496 some data yet to be processed. */
498 static void
499 save_buffer (struct buffer_record *buf)
501 struct buffer_record *p;
503 buf->next = NULL;
504 buf->curr_line = buf->line_start;
506 if (head == NULL)
507 head = buf;
508 else
510 for (p = head; p->next; p = p->next)
511 /* Do nothing. */ ;
512 p->next = buf;
516 /* Fill a buffer of input.
518 Set the initial size of the buffer to a default.
519 Fill the buffer (from the hold area and input stream)
520 and find the individual lines.
521 If no lines are found (the buffer is too small to hold the next line),
522 release the current buffer (whose contents would have been put in the
523 hold area) and repeat the process with another large buffer until at least
524 one entire line has been read.
526 Return TRUE if a new buffer was obtained, otherwise false
527 (in which case end-of-file must have been encountered). */
529 static boolean
530 load_buffer (void)
532 struct buffer_record *b;
533 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
534 unsigned int bytes_avail; /* Size of new buffer created. */
535 unsigned int lines_found; /* Number of lines in this new buffer. */
536 char *p; /* Place to load into buffer. */
538 if (have_read_eof)
539 return FALSE;
541 /* We must make the buffer at least as large as the amount of data
542 in the partial line left over from the last call. */
543 if (bytes_wanted < hold_count)
544 bytes_wanted = hold_count;
548 b = get_new_buffer (bytes_wanted);
549 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
550 p = b->buffer;
552 /* First check the `holding' area for a partial line. */
553 if (hold_count)
555 if (p != hold_area)
556 memcpy (p, hold_area, hold_count);
557 p += hold_count;
558 b->bytes_used += hold_count;
559 bytes_avail -= hold_count;
560 hold_count = 0;
563 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
565 lines_found = record_line_starts (b);
566 bytes_wanted = b->bytes_alloc * 2;
567 if (!lines_found)
568 free_buffer (b);
570 while (!lines_found && !have_read_eof);
572 if (lines_found)
573 save_buffer (b);
575 return lines_found != 0;
578 /* Return the line number of the first line that has not yet been retrieved. */
580 static unsigned int
581 get_first_line_in_buffer (void)
583 if (head == NULL && !load_buffer ())
584 error (EXIT_FAILURE, errno, _("input disappeared"));
586 return head->first_available;
589 /* Return a pointer to the logical first line in the buffer and make the
590 next line the logical first line.
591 Return NULL if there is no more input. */
593 static struct cstring *
594 remove_line (void)
596 struct cstring *line; /* Return value. */
597 struct line *l; /* For convenience. */
599 if (head == NULL && !load_buffer ())
600 return NULL;
602 if (current_line < head->first_available)
603 current_line = head->first_available;
605 ++(head->first_available);
607 l = head->curr_line;
609 line = &l->starts[l->retrieve_index];
611 /* Advance index to next line. */
612 if (++l->retrieve_index == l->used)
614 /* Go on to the next line record. */
615 head->curr_line = l->next;
616 if (head->curr_line == NULL || head->curr_line->used == 0)
618 /* Go on to the next data block. */
619 struct buffer_record *b = head;
620 head = head->next;
621 free_buffer (b);
625 return line;
628 /* Search the buffers for line LINENUM, reading more input if necessary.
629 Return a pointer to the line, or NULL if it is not found in the file. */
631 static struct cstring *
632 find_line (unsigned int linenum)
634 struct buffer_record *b;
636 if (head == NULL && !load_buffer ())
637 return NULL;
639 if (linenum < head->start_line)
640 return NULL;
642 for (b = head;;)
644 if (linenum < b->start_line + b->num_lines)
646 /* The line is in this buffer. */
647 struct line *l;
648 unsigned int offset; /* How far into the buffer the line is. */
650 l = b->line_start;
651 offset = linenum - b->start_line;
652 /* Find the control record. */
653 while (offset >= CTRL_SIZE)
655 l = l->next;
656 offset -= CTRL_SIZE;
658 return &l->starts[offset];
660 if (b->next == NULL && !load_buffer ())
661 return NULL;
662 b = b->next; /* Try the next data block. */
666 /* Return TRUE if at least one more line is available for input. */
668 static boolean
669 no_more_lines (void)
671 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
674 /* Set the name of the input file to NAME and open it. */
676 static void
677 set_input_file (const char *name)
679 if (STREQ (name, "-"))
680 input_desc = 0;
681 else
683 input_desc = open (name, O_RDONLY);
684 if (input_desc < 0)
685 error (EXIT_FAILURE, errno, "%s", name);
689 /* Write all lines from the beginning of the buffer up to, but
690 not including, line LAST_LINE, to the current output file.
691 If IGNORE is TRUE, do not output lines selected here.
692 ARGNUM is the index in ARGV of the current pattern. */
694 static void
695 write_to_file (unsigned int last_line, boolean ignore, int argnum)
697 struct cstring *line;
698 unsigned int first_line; /* First available input line. */
699 unsigned int lines; /* Number of lines to output. */
700 unsigned int i;
702 first_line = get_first_line_in_buffer ();
704 if (first_line > last_line)
706 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
707 cleanup_fatal ();
710 lines = last_line - first_line;
712 for (i = 0; i < lines; i++)
714 line = remove_line ();
715 if (line == NULL)
717 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
718 cleanup_fatal ();
720 if (!ignore)
721 save_line_to_file (line);
725 /* Output any lines left after all regexps have been processed. */
727 static void
728 dump_rest_of_file (void)
730 struct cstring *line;
732 while ((line = remove_line ()) != NULL)
733 save_line_to_file (line);
736 /* Handle an attempt to read beyond EOF under the control of record P,
737 on iteration REPETITION if nonzero. */
739 static void
740 handle_line_error (const struct control *p, int repetition)
742 char buf[LONGEST_HUMAN_READABLE + 1];
744 fprintf (stderr, _("%s: `%s': line number out of range"),
745 program_name, human_readable (p->lines_required, buf, 1, 1));
746 if (repetition)
747 fprintf (stderr, _(" on repetition %d\n"), repetition);
748 else
749 fprintf (stderr, "\n");
751 cleanup_fatal ();
754 /* Determine the line number that marks the end of this file,
755 then get those lines and save them to the output file.
756 P is the control record.
757 REPETITION is the repetition number. */
759 static void
760 process_line_count (const struct control *p, int repetition)
762 unsigned int linenum;
763 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
764 struct cstring *line;
766 create_output_file ();
768 linenum = get_first_line_in_buffer ();
770 while (linenum++ < last_line_to_save)
772 line = remove_line ();
773 if (line == NULL)
774 handle_line_error (p, repetition);
775 save_line_to_file (line);
778 close_output_file ();
780 /* Ensure that the line number specified is not 1 greater than
781 the number of lines in the file. */
782 if (no_more_lines ())
783 handle_line_error (p, repetition);
786 static void
787 regexp_error (struct control *p, int repetition, boolean ignore)
789 fprintf (stderr, _("%s: `%s': match not found"),
790 program_name, global_argv[p->argnum]);
792 if (repetition)
793 fprintf (stderr, _(" on repetition %d\n"), repetition);
794 else
795 fprintf (stderr, "\n");
797 if (!ignore)
799 dump_rest_of_file ();
800 close_output_file ();
802 cleanup_fatal ();
805 /* Read the input until a line matches the regexp in P, outputting
806 it unless P->IGNORE is TRUE.
807 REPETITION is this repeat-count; 0 means the first time. */
809 static void
810 process_regexp (struct control *p, int repetition)
812 struct cstring *line; /* From input file. */
813 unsigned int line_len; /* To make "$" in regexps work. */
814 unsigned int break_line; /* First line number of next file. */
815 boolean ignore = p->ignore; /* If TRUE, skip this section. */
816 int ret;
818 if (!ignore)
819 create_output_file ();
821 /* If there is no offset for the regular expression, or
822 it is positive, then it is not necessary to buffer the lines. */
824 if (p->offset >= 0)
826 for (;;)
828 line = find_line (++current_line);
829 if (line == NULL)
831 if (p->repeat_forever)
833 if (!ignore)
835 dump_rest_of_file ();
836 close_output_file ();
838 exit (EXIT_SUCCESS);
840 else
841 regexp_error (p, repetition, ignore);
843 line_len = line->len;
844 if (line->str[line_len - 1] == '\n')
845 line_len--;
846 ret = re_search (&p->re_compiled, line->str, line_len,
847 0, line_len, (struct re_registers *) 0);
848 if (ret == -2)
850 error (0, 0, _("error in regular expression search"));
851 cleanup_fatal ();
853 if (ret == -1)
855 line = remove_line ();
856 if (!ignore)
857 save_line_to_file (line);
859 else
860 break;
863 else
865 /* Buffer the lines. */
866 for (;;)
868 line = find_line (++current_line);
869 if (line == NULL)
871 if (p->repeat_forever)
873 if (!ignore)
875 dump_rest_of_file ();
876 close_output_file ();
878 exit (EXIT_SUCCESS);
880 else
881 regexp_error (p, repetition, ignore);
883 line_len = line->len;
884 if (line->str[line_len - 1] == '\n')
885 line_len--;
886 ret = re_search (&p->re_compiled, line->str, line_len,
887 0, line_len, (struct re_registers *) 0);
888 if (ret == -2)
890 error (0, 0, _("error in regular expression search"));
891 cleanup_fatal ();
893 if (ret >= 0)
894 break;
898 /* Account for any offset from this regexp. */
899 break_line = current_line + p->offset;
901 write_to_file (break_line, ignore, p->argnum);
903 if (!ignore)
904 close_output_file ();
906 if (p->offset > 0)
907 current_line = break_line;
910 /* Split the input file according to the control records we have built. */
912 static void
913 split_file (void)
915 unsigned int i, j;
917 for (i = 0; i < control_used; i++)
919 if (controls[i].regexpr)
921 for (j = 0; (controls[i].repeat_forever
922 || j <= controls[i].repeat); j++)
923 process_regexp (&controls[i], j);
925 else
927 for (j = 0; (controls[i].repeat_forever
928 || j <= controls[i].repeat); j++)
929 process_line_count (&controls[i], j);
933 create_output_file ();
934 dump_rest_of_file ();
935 close_output_file ();
938 /* Return the name of output file number NUM. */
940 static char *
941 make_filename (unsigned int num)
943 strcpy (filename_space, prefix);
944 if (suffix)
945 sprintf (filename_space+strlen(prefix), suffix, num);
946 else
947 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
948 return filename_space;
951 /* Create the next output file. */
953 static void
954 create_output_file (void)
956 output_filename = make_filename (files_created);
957 output_stream = fopen (output_filename, "w");
958 if (output_stream == NULL)
960 error (0, errno, "%s", output_filename);
961 cleanup_fatal ();
963 files_created++;
964 bytes_written = 0;
967 /* Delete all the files we have created. */
969 static void
970 delete_all_files (void)
972 unsigned int i;
973 char *name;
975 for (i = 0; i < files_created; i++)
977 name = make_filename (i);
978 if (unlink (name))
979 error (0, errno, "%s", name);
983 /* Close the current output file and print the count
984 of characters in this file. */
986 static void
987 close_output_file (void)
989 if (output_stream)
991 if (ferror (output_stream) || fclose (output_stream) == EOF)
993 error (0, errno, _("write error for `%s'"), output_filename);
994 output_stream = NULL;
995 cleanup_fatal ();
997 if (bytes_written == 0 && elide_empty_files)
999 if (unlink (output_filename))
1000 error (0, errno, "%s", output_filename);
1001 files_created--;
1003 else
1005 /* FIXME: if we write to stdout here, we have to close stdout
1006 and check for errors. */
1007 if (!suppress_count)
1008 fprintf (stdout, "%d\n", bytes_written);
1010 output_stream = NULL;
1014 /* Save line LINE to the output file and
1015 increment the character count for the current file. */
1017 static void
1018 save_line_to_file (const struct cstring *line)
1020 fwrite (line->str, sizeof (char), line->len, output_stream);
1021 bytes_written += line->len;
1024 /* Return a new, initialized control record. */
1026 static struct control *
1027 new_control_record (void)
1029 static unsigned control_allocated = 0; /* Total space allocated. */
1030 struct control *p;
1032 if (control_allocated == 0)
1034 control_allocated = ALLOC_SIZE;
1035 controls = (struct control *)
1036 xmalloc (sizeof (struct control) * control_allocated);
1038 else if (control_used == control_allocated)
1040 control_allocated += ALLOC_SIZE;
1041 controls = (struct control *)
1042 xrealloc ((char *) controls,
1043 sizeof (struct control) * control_allocated);
1045 p = &controls[control_used++];
1046 p->regexpr = NULL;
1047 p->repeat = 0;
1048 p->repeat_forever = 0;
1049 p->lines_required = 0;
1050 p->offset = 0;
1051 return p;
1054 /* Check if there is a numeric offset after a regular expression.
1055 STR is the entire command line argument.
1056 P is the control record for this regular expression.
1057 NUM is the numeric part of STR. */
1059 static void
1060 check_for_offset (struct control *p, const char *str, const char *num)
1062 unsigned long val;
1064 if (*num != '-' && *num != '+')
1065 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1067 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1068 || val > UINT_MAX)
1069 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1070 p->offset = (unsigned int) val;
1072 if (*num == '-')
1073 p->offset = -p->offset;
1076 /* Given that the first character of command line arg STR is '{',
1077 make sure that the rest of the string is a valid repeat count
1078 and store its value in P.
1079 ARGNUM is the ARGV index of STR. */
1081 static void
1082 parse_repeat_count (int argnum, struct control *p, char *str)
1084 uintmax_t val;
1085 char *end;
1087 end = str + strlen (str) - 1;
1088 if (*end != '}')
1089 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1090 *end = '\0';
1092 if (str+1 == end-1 && *(str+1) == '*')
1093 p->repeat_forever = 1;
1094 else
1096 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1098 error (EXIT_FAILURE, 0,
1099 _("%s}: integer required between `{' and `}'"),
1100 global_argv[argnum]);
1102 p->repeat = val;
1105 *end = '}';
1108 /* Extract the regular expression from STR and check for a numeric offset.
1109 STR should start with the regexp delimiter character.
1110 Return a new control record for the regular expression.
1111 ARGNUM is the ARGV index of STR.
1112 Unless IGNORE is TRUE, mark these lines for output. */
1114 static struct control *
1115 extract_regexp (int argnum, boolean ignore, char *str)
1117 int len; /* Number of chars in this regexp. */
1118 char delim = *str;
1119 char *closing_delim;
1120 struct control *p;
1121 const char *err;
1123 closing_delim = strrchr (str + 1, delim);
1124 if (closing_delim == NULL)
1125 error (EXIT_FAILURE, 0,
1126 _("%s: closing delimeter `%c' missing"), str, delim);
1128 len = closing_delim - str - 1;
1129 p = new_control_record ();
1130 p->argnum = argnum;
1131 p->ignore = ignore;
1133 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1134 strncpy (p->regexpr, str + 1, len);
1135 p->re_compiled.allocated = len * 2;
1136 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1137 p->re_compiled.fastmap = xmalloc (256);
1138 p->re_compiled.translate = 0;
1139 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1140 if (err)
1142 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1143 cleanup_fatal ();
1146 if (closing_delim[1])
1147 check_for_offset (p, str, closing_delim + 1);
1149 return p;
1152 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1153 After each pattern, check if the next argument is a repeat count. */
1155 static void
1156 parse_patterns (int argc, int start, char **argv)
1158 int i; /* Index into ARGV. */
1159 struct control *p; /* New control record created. */
1160 uintmax_t val;
1161 static uintmax_t last_val = 0;
1163 for (i = start; i < argc; i++)
1165 if (*argv[i] == '/' || *argv[i] == '%')
1167 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1169 else
1171 p = new_control_record ();
1172 p->argnum = i;
1174 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1175 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1176 if (val == 0)
1177 error (EXIT_FAILURE, 0,
1178 _("%s: line number must be greater than zero"),
1179 argv[i]);
1180 if (val < last_val)
1182 char buf[LONGEST_HUMAN_READABLE + 1];
1183 error (EXIT_FAILURE, 0,
1184 _("line number `%s' is smaller than preceding line number, %s"),
1185 argv[i], human_readable (last_val, buf, 1, 1));
1188 if (val == last_val)
1189 error (0, 0,
1190 _("warning: line number `%s' is the same as preceding line number"),
1191 argv[i]);
1193 last_val = val;
1195 p->lines_required = val;
1198 if (i + 1 < argc && *argv[i + 1] == '{')
1200 /* We have a repeat count. */
1201 i++;
1202 parse_repeat_count (i, p, argv[i]);
1207 static unsigned
1208 get_format_flags (char **format_ptr)
1210 unsigned count = 0;
1212 for (; **format_ptr; (*format_ptr)++)
1214 switch (**format_ptr)
1216 case '-':
1217 break;
1219 case '+':
1220 case ' ':
1221 count++;
1222 break;
1224 case '#':
1225 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1226 break;
1228 default:
1229 return count;
1232 return count;
1235 static unsigned
1236 get_format_width (char **format_ptr)
1238 unsigned count = 0;
1239 char *start;
1240 int ch_save;
1242 start = *format_ptr;
1243 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1244 continue;
1246 ch_save = **format_ptr;
1247 **format_ptr = '\0';
1248 /* In the case where no minimum field width is explicitly specified,
1249 allow for enough octal digits to represent the value of LONG_MAX. */
1250 count = ((*format_ptr == start)
1251 ? bytes_to_octal_digits[sizeof (long)]
1252 : atoi (start));
1253 **format_ptr = ch_save;
1254 return count;
1257 static unsigned
1258 get_format_prec (char **format_ptr)
1260 unsigned count = 0;
1261 char *start;
1262 int ch_save;
1263 int is_negative;
1265 if (**format_ptr != '.')
1266 return 0;
1267 (*format_ptr)++;
1269 if (**format_ptr == '-' || **format_ptr == '+')
1271 is_negative = (**format_ptr == '-');
1272 (*format_ptr)++;
1274 else
1276 is_negative = 0;
1279 start = *format_ptr;
1280 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1281 continue;
1283 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1284 not there. */
1285 if (is_negative)
1286 start = *format_ptr;
1288 ch_save = **format_ptr;
1289 **format_ptr = '\0';
1290 count = (*format_ptr == start) ? 11 : atoi (start);
1291 **format_ptr = ch_save;
1293 return count;
1296 static void
1297 get_format_conv_type (char **format_ptr)
1299 int ch = *((*format_ptr)++);
1301 switch (ch)
1303 case 'd':
1304 case 'i':
1305 case 'o':
1306 case 'u':
1307 case 'x':
1308 case 'X':
1309 break;
1311 case 0:
1312 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1313 break;
1315 default:
1316 if (ISPRINT (ch))
1317 error (EXIT_FAILURE, 0,
1318 _("invalid conversion specifier in suffix: %c"), ch);
1319 else
1320 error (EXIT_FAILURE, 0,
1321 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1325 static unsigned
1326 max_out (char *format)
1328 unsigned out_count = 0;
1329 unsigned percents = 0;
1331 for (; *format; )
1333 int ch = *format++;
1335 if (ch != '%')
1336 out_count++;
1337 else
1339 percents++;
1340 out_count += get_format_flags (&format);
1342 int width = get_format_width (&format);
1343 int prec = get_format_prec (&format);
1345 out_count += MAX (width, prec);
1347 get_format_conv_type (&format);
1351 if (percents == 0)
1352 error (EXIT_FAILURE, 0,
1353 _("missing %% conversion specification in suffix"));
1354 else if (percents > 1)
1355 error (EXIT_FAILURE, 0,
1356 _("too many %% conversion specifications in suffix"));
1358 return out_count;
1362 main (int argc, char **argv)
1364 int optc;
1365 unsigned long val;
1366 #ifdef SA_NOCLDSTOP
1367 struct sigaction oldact, newact;
1368 #endif
1370 program_name = argv[0];
1371 setlocale (LC_ALL, "");
1372 bindtextdomain (PACKAGE, LOCALEDIR);
1373 textdomain (PACKAGE);
1375 atexit (close_stdout);
1377 global_argv = argv;
1378 controls = NULL;
1379 control_used = 0;
1380 suppress_count = FALSE;
1381 remove_files = TRUE;
1382 prefix = DEFAULT_PREFIX;
1384 /* Change the way xmalloc and xrealloc fail. */
1385 xalloc_fail_func = cleanup;
1387 #ifdef SA_NOCLDSTOP
1388 newact.sa_handler = interrupt_handler;
1389 sigemptyset (&newact.sa_mask);
1390 newact.sa_flags = 0;
1392 sigaction (SIGHUP, NULL, &oldact);
1393 if (oldact.sa_handler != SIG_IGN)
1394 sigaction (SIGHUP, &newact, NULL);
1396 sigaction (SIGINT, NULL, &oldact);
1397 if (oldact.sa_handler != SIG_IGN)
1398 sigaction (SIGINT, &newact, NULL);
1400 sigaction (SIGQUIT, NULL, &oldact);
1401 if (oldact.sa_handler != SIG_IGN)
1402 sigaction (SIGQUIT, &newact, NULL);
1404 sigaction (SIGTERM, NULL, &oldact);
1405 if (oldact.sa_handler != SIG_IGN)
1406 sigaction (SIGTERM, &newact, NULL);
1407 #else
1408 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1409 signal (SIGHUP, interrupt_handler);
1410 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1411 signal (SIGINT, interrupt_handler);
1412 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1413 signal (SIGQUIT, interrupt_handler);
1414 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1415 signal (SIGTERM, interrupt_handler);
1416 #endif
1418 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1419 switch (optc)
1421 case 0:
1422 break;
1424 case 'f':
1425 prefix = optarg;
1426 break;
1428 case 'b':
1429 suffix = optarg;
1430 break;
1432 case 'k':
1433 remove_files = FALSE;
1434 break;
1436 case 'n':
1437 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1438 || val > INT_MAX)
1439 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1440 digits = (int) val;
1441 break;
1443 case 's':
1444 case 'q':
1445 suppress_count = TRUE;
1446 break;
1448 case 'z':
1449 elide_empty_files = TRUE;
1450 break;
1452 case_GETOPT_HELP_CHAR;
1454 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1456 default:
1457 usage (1);
1460 if (argc - optind < 2)
1462 error (0, 0, _("too few arguments"));
1463 usage (1);
1466 if (suffix)
1467 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1468 else
1469 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1471 set_input_file (argv[optind++]);
1473 parse_patterns (argc, optind, argv);
1475 split_file ();
1477 if (close (input_desc) < 0)
1479 error (0, errno, _("read error"));
1480 cleanup_fatal ();
1483 exit (EXIT_SUCCESS);
1486 void
1487 usage (int status)
1489 if (status != 0)
1490 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1491 program_name);
1492 else
1494 printf (_("\
1495 Usage: %s [OPTION]... FILE PATTERN...\n\
1497 program_name);
1498 fputs (_("\
1499 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1500 and output byte counts of each piece to standard output.\n\
1502 "), stdout);
1503 fputs (_("\
1504 Mandatory arguments to long options are mandatory for short options too.\n\
1505 "), stdout);
1506 fputs (_("\
1507 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %d\n\
1508 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1509 -k, --keep-files do not remove output files on errors\n\
1510 "), stdout);
1511 fputs (_("\
1512 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1513 -s, --quiet, --silent do not print counts of output file sizes\n\
1514 -z, --elide-empty-files remove empty output files\n\
1515 "), stdout);
1516 fputs (HELP_OPTION_DESCRIPTION, stdout);
1517 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1518 fputs (_("\
1520 Read standard input if FILE is -. Each PATTERN may be:\n\
1521 "), stdout);
1522 fputs (_("\
1524 INTEGER copy up to but not including specified line number\n\
1525 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1526 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1527 {INTEGER} repeat the previous pattern specified number of times\n\
1528 {*} repeat the previous pattern as many times as possible\n\
1530 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1531 "), stdout);
1532 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
1534 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);