(open_maybe_create): New function.
[coreutils.git] / src / csplit.c
blobc52d90f20d16db6dd9688e2701e620c39749e379
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
21 #include <config.h>
23 #include <stdio.h>
24 #include <assert.h>
25 #include <getopt.h>
26 #include <sys/types.h>
27 #include <signal.h>
29 #include "system.h"
31 #include <regex.h>
33 #include "error.h"
34 #include "long-options.h"
35 #include "safe-read.h"
36 #include "xstrtoul.h"
37 #include "xalloc.h"
39 #ifdef STDC_HEADERS
40 # include <stdlib.h>
41 #endif
43 #ifndef MAX
44 # define MAX(a,b) (((a) > (b)) ? (a) : (b))
45 #endif
47 #ifndef TRUE
48 # define FALSE 0
49 # define TRUE 1
50 #endif
52 /* Increment size of area for control records. */
53 #define ALLOC_SIZE 20
55 /* The default prefix for output file names. */
56 #define DEFAULT_PREFIX "xx"
58 typedef int boolean;
60 /* A compiled pattern arg. */
61 struct control
63 char *regexpr; /* Non-compiled regular expression. */
64 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
65 int offset; /* Offset from regexp to split at. */
66 int lines_required; /* Number of lines required. */
67 unsigned int repeat; /* Repeat count. */
68 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
69 int argnum; /* ARGV index. */
70 boolean ignore; /* If true, produce no output (for regexp). */
73 /* Initial size of data area in buffers. */
74 #define START_SIZE 8191
76 /* Increment size for data area. */
77 #define INCR_SIZE 2048
79 /* Number of lines kept in each node in line list. */
80 #define CTRL_SIZE 80
82 #ifdef DEBUG
83 /* Some small values to test the algorithms. */
84 # define START_SIZE 200
85 # define INCR_SIZE 10
86 # define CTRL_SIZE 1
87 #endif
89 /* A string with a length count. */
90 struct cstring
92 int len;
93 char *str;
96 /* Pointers to the beginnings of lines in the buffer area.
97 These structures are linked together if needed. */
98 struct line
100 unsigned used; /* Number of offsets used in this struct. */
101 unsigned insert_index; /* Next offset to use when inserting line. */
102 unsigned retrieve_index; /* Next index to use when retrieving line. */
103 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
104 struct line *next; /* Next in linked list. */
107 /* The structure to hold the input lines.
108 Contains a pointer to the data area and a list containing
109 pointers to the individual lines. */
110 struct buffer_record
112 unsigned bytes_alloc; /* Size of the buffer area. */
113 unsigned bytes_used; /* Bytes used in the buffer area. */
114 unsigned start_line; /* First line number in this buffer. */
115 unsigned first_available; /* First line that can be retrieved. */
116 unsigned num_lines; /* Number of complete lines in this buffer. */
117 char *buffer; /* Data area. */
118 struct line *line_start; /* Head of list of pointers to lines. */
119 struct line *curr_line; /* The line start record currently in use. */
120 struct buffer_record *next;
123 static void close_output_file PARAMS ((void));
124 static void create_output_file PARAMS ((void));
125 static void delete_all_files PARAMS ((void));
126 static void save_line_to_file PARAMS ((const struct cstring *line));
127 void usage PARAMS ((int status));
129 /* The name this program was run with. */
130 char *program_name;
132 /* Convert the number of 8-bit bytes of a binary representation to
133 the number of characters required to represent the same quantity
134 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
135 require a field width as wide as 11 characters. */
136 static const unsigned int bytes_to_octal_digits[] =
137 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
139 /* Input file descriptor. */
140 static int input_desc = 0;
142 /* List of available buffers. */
143 static struct buffer_record *free_list = NULL;
145 /* Start of buffer list. */
146 static struct buffer_record *head = NULL;
148 /* Partially read line. */
149 static char *hold_area = NULL;
151 /* Number of chars in `hold_area'. */
152 static unsigned hold_count = 0;
154 /* Number of the last line in the buffers. */
155 static unsigned last_line_number = 0;
157 /* Number of the line currently being examined. */
158 static unsigned current_line = 0;
160 /* If TRUE, we have read EOF. */
161 static boolean have_read_eof = FALSE;
163 /* Name of output files. */
164 static char *filename_space = NULL;
166 /* Prefix part of output file names. */
167 static char *prefix = NULL;
169 /* Suffix part of output file names. */
170 static char *suffix = NULL;
172 /* Number of digits to use in output file names. */
173 static int digits = 2;
175 /* Number of files created so far. */
176 static unsigned int files_created = 0;
178 /* Number of bytes written to current file. */
179 static unsigned int bytes_written;
181 /* Output file pointer. */
182 static FILE *output_stream = NULL;
184 /* Output file name. */
185 static char *output_filename = NULL;
187 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
188 static char **global_argv;
190 /* If TRUE, do not print the count of bytes in each output file. */
191 static boolean suppress_count;
193 /* If TRUE, remove output files on error. */
194 static boolean remove_files;
196 /* If TRUE, remove all output files which have a zero length. */
197 static boolean elide_empty_files;
199 /* The compiled pattern arguments, which determine how to split
200 the input file. */
201 static struct control *controls;
203 /* Number of elements in `controls'. */
204 static unsigned int control_used;
206 static struct option const longopts[] =
208 {"digits", required_argument, NULL, 'n'},
209 {"quiet", no_argument, NULL, 'q'},
210 {"silent", no_argument, NULL, 's'},
211 {"keep-files", no_argument, NULL, 'k'},
212 {"elide-empty-files", no_argument, NULL, 'z'},
213 {"prefix", required_argument, NULL, 'f'},
214 {"suffix-format", required_argument, NULL, 'b'},
215 {NULL, 0, NULL, 0}
218 /* Optionally remove files created so far; then exit.
219 Called when an error detected. */
221 static void
222 cleanup (void)
224 if (output_stream)
225 close_output_file ();
227 if (remove_files)
228 delete_all_files ();
231 static void
232 cleanup_fatal (void)
234 cleanup ();
235 exit (EXIT_FAILURE);
238 static RETSIGTYPE
239 interrupt_handler (int sig)
241 #ifdef SA_INTERRUPT
242 struct sigaction sigact;
244 sigact.sa_handler = SIG_DFL;
245 sigemptyset (&sigact.sa_mask);
246 sigact.sa_flags = 0;
247 sigaction (sig, &sigact, NULL);
248 #else /* !SA_INTERRUPT */
249 signal (sig, SIG_DFL);
250 #endif /* SA_INTERRUPT */
251 cleanup ();
252 kill (getpid (), sig);
255 /* Keep track of NUM chars of a partial line in buffer START.
256 These chars will be retrieved later when another large buffer is read.
257 It is not necessary to create a new buffer for these chars; instead,
258 we keep a pointer to the existing buffer. This buffer *is* on the
259 free list, and when the next buffer is obtained from this list
260 (even if it is this one), these chars will be placed at the
261 start of the new buffer. */
263 static void
264 save_to_hold_area (char *start, unsigned int num)
266 hold_area = start;
267 hold_count = num;
270 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
271 Return the number of chars read. */
273 static int
274 read_input (char *dest, unsigned int max_n_bytes)
276 int bytes_read;
278 if (max_n_bytes == 0)
279 return 0;
281 bytes_read = safe_read (input_desc, dest, max_n_bytes);
283 if (bytes_read == 0)
284 have_read_eof = TRUE;
286 if (bytes_read < 0)
288 error (0, errno, _("read error"));
289 cleanup_fatal ();
292 return bytes_read;
295 /* Initialize existing line record P. */
297 static void
298 clear_line_control (struct line *p)
300 p->used = 0;
301 p->insert_index = 0;
302 p->retrieve_index = 0;
305 /* Initialize all line records in B. */
307 static void
308 clear_all_line_control (struct buffer_record *b)
310 struct line *l;
312 for (l = b->line_start; l; l = l->next)
313 clear_line_control (l);
316 /* Return a new, initialized line record. */
318 static struct line *
319 new_line_control (void)
321 struct line *p;
323 p = (struct line *) xmalloc (sizeof (struct line));
325 p->next = NULL;
326 clear_line_control (p);
328 return p;
331 /* Record LINE_START, which is the address of the start of a line
332 of length LINE_LEN in the large buffer, in the lines buffer of B. */
334 static void
335 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
337 struct line *l;
339 /* If there is no existing area to keep line info, get some. */
340 if (b->line_start == NULL)
341 b->line_start = b->curr_line = new_line_control ();
343 /* If existing area for lines is full, get more. */
344 if (b->curr_line->used == CTRL_SIZE)
346 b->curr_line->next = new_line_control ();
347 b->curr_line = b->curr_line->next;
350 l = b->curr_line;
352 /* Record the start of the line, and update counters. */
353 l->starts[l->insert_index].str = line_start;
354 l->starts[l->insert_index].len = line_len;
355 l->used++;
356 l->insert_index++;
359 /* Scan the buffer in B for newline characters
360 and record the line start locations and lengths in B.
361 Return the number of lines found in this buffer.
363 There may be an incomplete line at the end of the buffer;
364 a pointer is kept to this area, which will be used when
365 the next buffer is filled. */
367 static unsigned int
368 record_line_starts (struct buffer_record *b)
370 char *line_start; /* Start of current line. */
371 char *line_end; /* End of each line found. */
372 unsigned int bytes_left; /* Length of incomplete last line. */
373 unsigned int lines; /* Number of lines found. */
374 unsigned int line_length; /* Length of each line found. */
376 if (b->bytes_used == 0)
377 return 0;
379 lines = 0;
380 line_start = b->buffer;
381 bytes_left = b->bytes_used;
383 for (;;)
385 line_end = memchr (line_start, '\n', bytes_left);
386 if (line_end == NULL)
387 break;
388 line_length = line_end - line_start + 1;
389 keep_new_line (b, line_start, line_length);
390 bytes_left -= line_length;
391 line_start = line_end + 1;
392 lines++;
395 /* Check for an incomplete last line. */
396 if (bytes_left)
398 if (have_read_eof)
400 keep_new_line (b, line_start, bytes_left);
401 lines++;
403 else
404 save_to_hold_area (line_start, bytes_left);
407 b->num_lines = lines;
408 b->first_available = b->start_line = last_line_number + 1;
409 last_line_number += lines;
411 return lines;
414 /* Return a new buffer with room to store SIZE bytes, plus
415 an extra byte for safety. */
417 static struct buffer_record *
418 create_new_buffer (unsigned int size)
420 struct buffer_record *new_buffer;
422 new_buffer = (struct buffer_record *)
423 xmalloc (sizeof (struct buffer_record));
425 new_buffer->buffer = (char *) xmalloc (size + 1);
427 new_buffer->bytes_alloc = size;
428 new_buffer->line_start = new_buffer->curr_line = NULL;
430 return new_buffer;
433 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
434 least that size is currently free, use it, otherwise create a new one. */
436 static struct buffer_record *
437 get_new_buffer (unsigned int min_size)
439 struct buffer_record *p, *q;
440 struct buffer_record *new_buffer; /* Buffer to return. */
441 unsigned int alloc_size; /* Actual size that will be requested. */
443 alloc_size = START_SIZE;
444 while (min_size > alloc_size)
445 alloc_size += INCR_SIZE;
447 if (free_list == NULL)
448 new_buffer = create_new_buffer (alloc_size);
449 else
451 /* Use first-fit to find a buffer. */
452 p = new_buffer = NULL;
453 q = free_list;
457 if (q->bytes_alloc >= min_size)
459 if (p == NULL)
460 free_list = q->next;
461 else
462 p->next = q->next;
463 break;
465 p = q;
466 q = q->next;
468 while (q);
470 new_buffer = (q ? q : create_new_buffer (alloc_size));
472 new_buffer->curr_line = new_buffer->line_start;
473 clear_all_line_control (new_buffer);
476 new_buffer->num_lines = 0;
477 new_buffer->bytes_used = 0;
478 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
479 new_buffer->next = NULL;
481 return new_buffer;
484 /* Add buffer BUF to the list of free buffers. */
486 static void
487 free_buffer (struct buffer_record *buf)
489 buf->next = free_list;
490 free_list = buf;
493 /* Append buffer BUF to the linked list of buffers that contain
494 some data yet to be processed. */
496 static void
497 save_buffer (struct buffer_record *buf)
499 struct buffer_record *p;
501 buf->next = NULL;
502 buf->curr_line = buf->line_start;
504 if (head == NULL)
505 head = buf;
506 else
508 for (p = head; p->next; p = p->next)
509 /* Do nothing. */ ;
510 p->next = buf;
514 /* Fill a buffer of input.
516 Set the initial size of the buffer to a default.
517 Fill the buffer (from the hold area and input stream)
518 and find the individual lines.
519 If no lines are found (the buffer is too small to hold the next line),
520 release the current buffer (whose contents would have been put in the
521 hold area) and repeat the process with another large buffer until at least
522 one entire line has been read.
524 Return TRUE if a new buffer was obtained, otherwise false
525 (in which case end-of-file must have been encountered). */
527 static boolean
528 load_buffer (void)
530 struct buffer_record *b;
531 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
532 unsigned int bytes_avail; /* Size of new buffer created. */
533 unsigned int lines_found; /* Number of lines in this new buffer. */
534 char *p; /* Place to load into buffer. */
536 if (have_read_eof)
537 return FALSE;
539 /* We must make the buffer at least as large as the amount of data
540 in the partial line left over from the last call. */
541 if (bytes_wanted < hold_count)
542 bytes_wanted = hold_count;
546 b = get_new_buffer (bytes_wanted);
547 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
548 p = b->buffer;
550 /* First check the `holding' area for a partial line. */
551 if (hold_count)
553 if (p != hold_area)
554 memcpy (p, hold_area, hold_count);
555 p += hold_count;
556 b->bytes_used += hold_count;
557 bytes_avail -= hold_count;
558 hold_count = 0;
561 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
563 lines_found = record_line_starts (b);
564 bytes_wanted = b->bytes_alloc * 2;
565 if (!lines_found)
566 free_buffer (b);
568 while (!lines_found && !have_read_eof);
570 if (lines_found)
571 save_buffer (b);
573 return lines_found != 0;
576 /* Return the line number of the first line that has not yet been retrieved. */
578 static unsigned int
579 get_first_line_in_buffer (void)
581 if (head == NULL && !load_buffer ())
582 error (EXIT_FAILURE, errno, _("input disappeared"));
584 return head->first_available;
587 /* Return a pointer to the logical first line in the buffer and make the
588 next line the logical first line.
589 Return NULL if there is no more input. */
591 static struct cstring *
592 remove_line (void)
594 struct cstring *line; /* Return value. */
595 struct line *l; /* For convenience. */
597 if (head == NULL && !load_buffer ())
598 return NULL;
600 if (current_line < head->first_available)
601 current_line = head->first_available;
603 ++(head->first_available);
605 l = head->curr_line;
607 line = &l->starts[l->retrieve_index];
609 /* Advance index to next line. */
610 if (++l->retrieve_index == l->used)
612 /* Go on to the next line record. */
613 head->curr_line = l->next;
614 if (head->curr_line == NULL || head->curr_line->used == 0)
616 /* Go on to the next data block. */
617 struct buffer_record *b = head;
618 head = head->next;
619 free_buffer (b);
623 return line;
626 /* Search the buffers for line LINENUM, reading more input if necessary.
627 Return a pointer to the line, or NULL if it is not found in the file. */
629 static struct cstring *
630 find_line (unsigned int linenum)
632 struct buffer_record *b;
634 if (head == NULL && !load_buffer ())
635 return NULL;
637 if (linenum < head->start_line)
638 return NULL;
640 for (b = head;;)
642 if (linenum < b->start_line + b->num_lines)
644 /* The line is in this buffer. */
645 struct line *l;
646 unsigned int offset; /* How far into the buffer the line is. */
648 l = b->line_start;
649 offset = linenum - b->start_line;
650 /* Find the control record. */
651 while (offset >= CTRL_SIZE)
653 l = l->next;
654 offset -= CTRL_SIZE;
656 return &l->starts[offset];
658 if (b->next == NULL && !load_buffer ())
659 return NULL;
660 b = b->next; /* Try the next data block. */
664 /* Return TRUE if at least one more line is available for input. */
666 static boolean
667 no_more_lines (void)
669 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
672 /* Set the name of the input file to NAME and open it. */
674 static void
675 set_input_file (const char *name)
677 if (STREQ (name, "-"))
678 input_desc = 0;
679 else
681 input_desc = open (name, O_RDONLY);
682 if (input_desc < 0)
683 error (EXIT_FAILURE, errno, "%s", name);
687 /* Write all lines from the beginning of the buffer up to, but
688 not including, line LAST_LINE, to the current output file.
689 If IGNORE is TRUE, do not output lines selected here.
690 ARGNUM is the index in ARGV of the current pattern. */
692 static void
693 write_to_file (unsigned int last_line, boolean ignore, int argnum)
695 struct cstring *line;
696 unsigned int first_line; /* First available input line. */
697 unsigned int lines; /* Number of lines to output. */
698 unsigned int i;
700 first_line = get_first_line_in_buffer ();
702 if (first_line > last_line)
704 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
705 cleanup_fatal ();
708 lines = last_line - first_line;
710 for (i = 0; i < lines; i++)
712 line = remove_line ();
713 if (line == NULL)
715 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
716 cleanup_fatal ();
718 if (!ignore)
719 save_line_to_file (line);
723 /* Output any lines left after all regexps have been processed. */
725 static void
726 dump_rest_of_file (void)
728 struct cstring *line;
730 while ((line = remove_line ()) != NULL)
731 save_line_to_file (line);
734 /* Handle an attempt to read beyond EOF under the control of record P,
735 on iteration REPETITION if nonzero. */
737 static void
738 handle_line_error (const struct control *p, int repetition)
740 fprintf (stderr, _("%s: `%d': line number out of range"),
741 program_name, p->lines_required);
742 if (repetition)
743 fprintf (stderr, _(" on repetition %d\n"), repetition);
744 else
745 fprintf (stderr, "\n");
747 cleanup_fatal ();
750 /* Determine the line number that marks the end of this file,
751 then get those lines and save them to the output file.
752 P is the control record.
753 REPETITION is the repetition number. */
755 static void
756 process_line_count (const struct control *p, int repetition)
758 unsigned int linenum;
759 unsigned int last_line_to_save = p->lines_required * (repetition + 1);
760 struct cstring *line;
762 create_output_file ();
764 linenum = get_first_line_in_buffer ();
766 /* Initially, I wanted to assert linenum < last_line_to_save, but that
767 condition is false for the valid command: echo | csplit - 1 '{*}'.
768 So, relax it just a little. */
769 assert ((linenum == 1 && last_line_to_save == 1)
770 || linenum < last_line_to_save);
772 while (linenum++ < last_line_to_save)
774 line = remove_line ();
775 if (line == NULL)
776 handle_line_error (p, repetition);
777 save_line_to_file (line);
780 close_output_file ();
782 /* Ensure that the line number specified is not 1 greater than
783 the number of lines in the file. */
784 if (no_more_lines ())
785 handle_line_error (p, repetition);
788 static void
789 regexp_error (struct control *p, int repetition, boolean ignore)
791 fprintf (stderr, _("%s: `%s': match not found"),
792 program_name, global_argv[p->argnum]);
794 if (repetition)
795 fprintf (stderr, _(" on repetition %d\n"), repetition);
796 else
797 fprintf (stderr, "\n");
799 if (!ignore)
801 dump_rest_of_file ();
802 close_output_file ();
804 cleanup_fatal ();
807 /* Read the input until a line matches the regexp in P, outputting
808 it unless P->IGNORE is TRUE.
809 REPETITION is this repeat-count; 0 means the first time. */
811 static void
812 process_regexp (struct control *p, int repetition)
814 struct cstring *line; /* From input file. */
815 unsigned int line_len; /* To make "$" in regexps work. */
816 unsigned int break_line; /* First line number of next file. */
817 boolean ignore = p->ignore; /* If TRUE, skip this section. */
818 int ret;
820 if (!ignore)
821 create_output_file ();
823 /* If there is no offset for the regular expression, or
824 it is positive, then it is not necessary to buffer the lines. */
826 if (p->offset >= 0)
828 for (;;)
830 line = find_line (++current_line);
831 if (line == NULL)
833 if (p->repeat_forever)
835 if (!ignore)
837 dump_rest_of_file ();
838 close_output_file ();
840 exit (EXIT_SUCCESS);
842 else
843 regexp_error (p, repetition, ignore);
845 line_len = line->len;
846 if (line->str[line_len - 1] == '\n')
847 line_len--;
848 ret = re_search (&p->re_compiled, line->str, line_len,
849 0, line_len, (struct re_registers *) 0);
850 if (ret == -2)
852 error (0, 0, _("error in regular expression search"));
853 cleanup_fatal ();
855 if (ret == -1)
857 line = remove_line ();
858 if (!ignore)
859 save_line_to_file (line);
861 else
862 break;
865 else
867 /* Buffer the lines. */
868 for (;;)
870 line = find_line (++current_line);
871 if (line == NULL)
873 if (p->repeat_forever)
875 if (!ignore)
877 dump_rest_of_file ();
878 close_output_file ();
880 exit (EXIT_SUCCESS);
882 else
883 regexp_error (p, repetition, ignore);
885 line_len = line->len;
886 if (line->str[line_len - 1] == '\n')
887 line_len--;
888 ret = re_search (&p->re_compiled, line->str, line_len,
889 0, line_len, (struct re_registers *) 0);
890 if (ret == -2)
892 error (0, 0, _("error in regular expression search"));
893 cleanup_fatal ();
895 if (ret >= 0)
896 break;
900 /* Account for any offset from this regexp. */
901 break_line = current_line + p->offset;
903 write_to_file (break_line, ignore, p->argnum);
905 if (!ignore)
906 close_output_file ();
908 if (p->offset > 0)
909 current_line = break_line;
912 /* Split the input file according to the control records we have built. */
914 static void
915 split_file (void)
917 unsigned int i, j;
919 for (i = 0; i < control_used; i++)
921 if (controls[i].regexpr)
923 for (j = 0; (controls[i].repeat_forever
924 || j <= controls[i].repeat); j++)
925 process_regexp (&controls[i], j);
927 else
929 for (j = 0; (controls[i].repeat_forever
930 || j <= controls[i].repeat); j++)
931 process_line_count (&controls[i], j);
935 create_output_file ();
936 dump_rest_of_file ();
937 close_output_file ();
940 /* Return the name of output file number NUM. */
942 static char *
943 make_filename (unsigned int num)
945 strcpy (filename_space, prefix);
946 if (suffix)
947 sprintf (filename_space+strlen(prefix), suffix, num);
948 else
949 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
950 return filename_space;
953 /* Create the next output file. */
955 static void
956 create_output_file (void)
958 output_filename = make_filename (files_created);
959 output_stream = fopen (output_filename, "w");
960 if (output_stream == NULL)
962 error (0, errno, "%s", output_filename);
963 cleanup_fatal ();
965 files_created++;
966 bytes_written = 0;
969 /* Delete all the files we have created. */
971 static void
972 delete_all_files (void)
974 unsigned int i;
975 char *name;
977 for (i = 0; i < files_created; i++)
979 name = make_filename (i);
980 if (unlink (name))
981 error (0, errno, "%s", name);
985 /* Close the current output file and print the count
986 of characters in this file. */
988 static void
989 close_output_file (void)
991 if (output_stream)
993 if (ferror (output_stream) || fclose (output_stream) == EOF)
995 error (0, errno, _("write error for `%s'"), output_filename);
996 output_stream = NULL;
997 cleanup_fatal ();
999 if (bytes_written == 0 && elide_empty_files)
1001 if (unlink (output_filename))
1002 error (0, errno, "%s", output_filename);
1003 files_created--;
1005 else
1007 /* FIXME: if we write to stdout here, we have to close stdout
1008 and check for errors. */
1009 if (!suppress_count)
1010 fprintf (stdout, "%d\n", bytes_written);
1012 output_stream = NULL;
1016 /* Save line LINE to the output file and
1017 increment the character count for the current file. */
1019 static void
1020 save_line_to_file (const struct cstring *line)
1022 fwrite (line->str, sizeof (char), line->len, output_stream);
1023 bytes_written += line->len;
1026 /* Return a new, initialized control record. */
1028 static struct control *
1029 new_control_record (void)
1031 static unsigned control_allocated = 0; /* Total space allocated. */
1032 struct control *p;
1034 if (control_allocated == 0)
1036 control_allocated = ALLOC_SIZE;
1037 controls = (struct control *)
1038 xmalloc (sizeof (struct control) * control_allocated);
1040 else if (control_used == control_allocated)
1042 control_allocated += ALLOC_SIZE;
1043 controls = (struct control *)
1044 xrealloc ((char *) controls,
1045 sizeof (struct control) * control_allocated);
1047 p = &controls[control_used++];
1048 p->regexpr = NULL;
1049 p->repeat = 0;
1050 p->repeat_forever = 0;
1051 p->lines_required = 0;
1052 p->offset = 0;
1053 return p;
1056 /* Check if there is a numeric offset after a regular expression.
1057 STR is the entire command line argument.
1058 P is the control record for this regular expression.
1059 NUM is the numeric part of STR. */
1061 static void
1062 check_for_offset (struct control *p, const char *str, const char *num)
1064 unsigned long val;
1066 if (*num != '-' && *num != '+')
1067 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1069 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1070 || val > UINT_MAX)
1071 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1072 p->offset = (unsigned int) val;
1074 if (*num == '-')
1075 p->offset = -p->offset;
1078 /* Given that the first character of command line arg STR is '{',
1079 make sure that the rest of the string is a valid repeat count
1080 and store its value in P.
1081 ARGNUM is the ARGV index of STR. */
1083 static void
1084 parse_repeat_count (int argnum, struct control *p, char *str)
1086 unsigned long val;
1087 char *end;
1089 end = str + strlen (str) - 1;
1090 if (*end != '}')
1091 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1092 *end = '\0';
1094 if (str+1 == end-1 && *(str+1) == '*')
1095 p->repeat_forever = 1;
1096 else
1098 if (xstrtoul (str + 1, NULL, 10, &val, "") != LONGINT_OK
1099 || val > UINT_MAX)
1101 error (EXIT_FAILURE, 0,
1102 _("%s}: integer required between `{' and `}'"),
1103 global_argv[argnum]);
1105 p->repeat = (unsigned int) val;
1108 *end = '}';
1111 /* Extract the regular expression from STR and check for a numeric offset.
1112 STR should start with the regexp delimiter character.
1113 Return a new control record for the regular expression.
1114 ARGNUM is the ARGV index of STR.
1115 Unless IGNORE is TRUE, mark these lines for output. */
1117 static struct control *
1118 extract_regexp (int argnum, boolean ignore, char *str)
1120 int len; /* Number of chars in this regexp. */
1121 char delim = *str;
1122 char *closing_delim;
1123 struct control *p;
1124 const char *err;
1126 closing_delim = strrchr (str + 1, delim);
1127 if (closing_delim == NULL)
1128 error (EXIT_FAILURE, 0,
1129 _("%s: closing delimeter `%c' missing"), str, delim);
1131 len = closing_delim - str - 1;
1132 p = new_control_record ();
1133 p->argnum = argnum;
1134 p->ignore = ignore;
1136 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1137 strncpy (p->regexpr, str + 1, len);
1138 p->re_compiled.allocated = len * 2;
1139 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1140 p->re_compiled.fastmap = xmalloc (256);
1141 p->re_compiled.translate = 0;
1142 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1143 if (err)
1145 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1146 cleanup_fatal ();
1149 if (closing_delim[1])
1150 check_for_offset (p, str, closing_delim + 1);
1152 return p;
1155 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1156 After each pattern, check if the next argument is a repeat count. */
1158 static void
1159 parse_patterns (int argc, int start, char **argv)
1161 int i; /* Index into ARGV. */
1162 struct control *p; /* New control record created. */
1163 unsigned long val;
1164 static unsigned long last_val = 0;
1166 for (i = start; i < argc; i++)
1168 if (*argv[i] == '/' || *argv[i] == '%')
1170 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1172 else
1174 p = new_control_record ();
1175 p->argnum = i;
1177 if (xstrtoul (argv[i], NULL, 10, &val, "") != LONGINT_OK
1178 || val > INT_MAX)
1179 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1180 if (val == 0)
1181 error (EXIT_FAILURE, 0,
1182 _("%s: line number must be greater than zero"),
1183 argv[i]);
1184 if (val < last_val)
1185 error (EXIT_FAILURE, 0,
1186 _("line number `%s' is smaller than preceding line number, %lu"),
1187 argv[i], last_val);
1189 if (val == last_val)
1190 error (0, 0,
1191 _("warning: line number `%s' is the same as preceding line number"),
1192 argv[i]);
1193 last_val = val;
1195 p->lines_required = (int) val;
1198 if (i + 1 < argc && *argv[i + 1] == '{')
1200 /* We have a repeat count. */
1201 i++;
1202 parse_repeat_count (i, p, argv[i]);
1207 static unsigned
1208 get_format_flags (char **format_ptr)
1210 unsigned count = 0;
1212 for (; **format_ptr; (*format_ptr)++)
1214 switch (**format_ptr)
1216 case '-':
1217 break;
1219 case '+':
1220 case ' ':
1221 count++;
1222 break;
1224 case '#':
1225 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1226 break;
1228 default:
1229 return count;
1232 return count;
1235 static unsigned
1236 get_format_width (char **format_ptr)
1238 unsigned count = 0;
1239 char *start;
1240 int ch_save;
1242 start = *format_ptr;
1243 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1244 continue;
1246 ch_save = **format_ptr;
1247 **format_ptr = '\0';
1248 /* In the case where no minimum field width is explicitly specified,
1249 allow for enough octal digits to represent the value of LONG_MAX. */
1250 count = ((*format_ptr == start)
1251 ? bytes_to_octal_digits[sizeof (long)]
1252 : atoi (start));
1253 **format_ptr = ch_save;
1254 return count;
1257 static unsigned
1258 get_format_prec (char **format_ptr)
1260 unsigned count = 0;
1261 char *start;
1262 int ch_save;
1263 int is_negative;
1265 if (**format_ptr != '.')
1266 return 0;
1267 (*format_ptr)++;
1269 if (**format_ptr == '-' || **format_ptr == '+')
1271 is_negative = (**format_ptr == '-');
1272 (*format_ptr)++;
1274 else
1276 is_negative = 0;
1279 start = *format_ptr;
1280 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1281 continue;
1283 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1284 not there. */
1285 if (is_negative)
1286 start = *format_ptr;
1288 ch_save = **format_ptr;
1289 **format_ptr = '\0';
1290 count = (*format_ptr == start) ? 11 : atoi (start);
1291 **format_ptr = ch_save;
1293 return count;
1296 static void
1297 get_format_conv_type (char **format_ptr)
1299 int ch = *((*format_ptr)++);
1301 switch (ch)
1303 case 'd':
1304 case 'i':
1305 case 'o':
1306 case 'u':
1307 case 'x':
1308 case 'X':
1309 break;
1311 case 0:
1312 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1313 break;
1315 default:
1316 if (ISPRINT (ch))
1317 error (EXIT_FAILURE, 0,
1318 _("invalid conversion specifier in suffix: %c"), ch);
1319 else
1320 error (EXIT_FAILURE, 0,
1321 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1325 static unsigned
1326 max_out (char *format)
1328 unsigned out_count = 0;
1329 unsigned percents = 0;
1331 for (; *format; )
1333 int ch = *format++;
1335 if (ch != '%')
1336 out_count++;
1337 else
1339 percents++;
1340 out_count += get_format_flags (&format);
1342 int width = get_format_width (&format);
1343 int prec = get_format_prec (&format);
1345 out_count += MAX (width, prec);
1347 get_format_conv_type (&format);
1351 if (percents == 0)
1352 error (EXIT_FAILURE, 0,
1353 _("missing %% conversion specification in suffix"));
1354 else if (percents > 1)
1355 error (EXIT_FAILURE, 0,
1356 _("too many %% conversion specifications in suffix"));
1358 return out_count;
1362 main (int argc, char **argv)
1364 int optc;
1365 unsigned long val;
1366 #ifdef SA_INTERRUPT
1367 struct sigaction oldact, newact;
1368 #endif
1370 program_name = argv[0];
1371 setlocale (LC_ALL, "");
1372 bindtextdomain (PACKAGE, LOCALEDIR);
1373 textdomain (PACKAGE);
1375 parse_long_options (argc, argv, "csplit", GNU_PACKAGE, VERSION,
1376 "Stuart Kemp and David MacKenzie", usage);
1378 global_argv = argv;
1379 controls = NULL;
1380 control_used = 0;
1381 suppress_count = FALSE;
1382 remove_files = TRUE;
1383 prefix = DEFAULT_PREFIX;
1385 /* Change the way xmalloc and xrealloc fail. */
1386 xalloc_fail_func = cleanup;
1388 #ifdef SA_INTERRUPT
1389 newact.sa_handler = interrupt_handler;
1390 sigemptyset (&newact.sa_mask);
1391 newact.sa_flags = 0;
1393 sigaction (SIGHUP, NULL, &oldact);
1394 if (oldact.sa_handler != SIG_IGN)
1395 sigaction (SIGHUP, &newact, NULL);
1397 sigaction (SIGINT, NULL, &oldact);
1398 if (oldact.sa_handler != SIG_IGN)
1399 sigaction (SIGINT, &newact, NULL);
1401 sigaction (SIGQUIT, NULL, &oldact);
1402 if (oldact.sa_handler != SIG_IGN)
1403 sigaction (SIGQUIT, &newact, NULL);
1405 sigaction (SIGTERM, NULL, &oldact);
1406 if (oldact.sa_handler != SIG_IGN)
1407 sigaction (SIGTERM, &newact, NULL);
1408 #else /* not SA_INTERRUPT */
1409 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1410 signal (SIGHUP, interrupt_handler);
1411 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1412 signal (SIGINT, interrupt_handler);
1413 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1414 signal (SIGQUIT, interrupt_handler);
1415 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1416 signal (SIGTERM, interrupt_handler);
1417 #endif /* not SA_INTERRUPT */
1419 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1420 switch (optc)
1422 case 0:
1423 break;
1425 case 'f':
1426 prefix = optarg;
1427 break;
1429 case 'b':
1430 suffix = optarg;
1431 break;
1433 case 'k':
1434 remove_files = FALSE;
1435 break;
1437 case 'n':
1438 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1439 || val > INT_MAX)
1440 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1441 digits = (int) val;
1442 break;
1444 case 's':
1445 case 'q':
1446 suppress_count = TRUE;
1447 break;
1449 case 'z':
1450 elide_empty_files = TRUE;
1451 break;
1453 default:
1454 usage (1);
1457 if (argc - optind < 2)
1459 error (0, 0, _("too few arguments"));
1460 usage (1);
1463 if (suffix)
1464 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1465 else
1466 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1468 set_input_file (argv[optind++]);
1470 parse_patterns (argc, optind, argv);
1472 split_file ();
1474 if (close (input_desc) < 0)
1476 error (0, errno, _("read error"));
1477 cleanup_fatal ();
1480 if (!suppress_count && (ferror (stdout) || fclose (stdout) == EOF))
1481 error (EXIT_FAILURE, errno, _("write error"));
1483 exit (EXIT_SUCCESS);
1486 void
1487 usage (int status)
1489 if (status != 0)
1490 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1491 program_name);
1492 else
1494 printf (_("\
1495 Usage: %s [OPTION]... FILE PATTERN...\n\
1497 program_name);
1498 printf (_("\
1499 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1500 and output byte counts of each piece to standard output.\n\
1502 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1503 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1504 -k, --keep-files do not remove output files on errors\n\
1505 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1506 -s, --quiet, --silent do not print counts of output file sizes\n\
1507 -z, --elide-empty-files remove empty output files\n\
1508 --help display this help and exit\n\
1509 --version output version information and exit\n\
1511 Read standard input if FILE is -. Each PATTERN may be:\n\
1513 INTEGER copy up to but not including specified line number\n\
1514 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1515 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1516 {INTEGER} repeat the previous pattern specified number of times\n\
1517 {*} repeat the previous pattern as many times as possible\n\
1519 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1520 "));
1521 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
1523 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);