*** empty log message ***
[coreutils.git] / src / csplit.c
blobfe007eb5f2329660afd57ccc23bb4f9bdf383607
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 95, 96, 1997, 1998 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
21 #include <config.h>
23 #include <stdio.h>
24 #include <assert.h>
25 #include <getopt.h>
26 #include <sys/types.h>
27 #include <signal.h>
29 #include "system.h"
31 #include <regex.h>
33 #include "error.h"
34 #include "xstrtoul.h"
35 #include "xalloc.h"
36 #include "safe-read.h"
38 #ifdef STDC_HEADERS
39 # include <stdlib.h>
40 #endif
42 #ifndef MAX
43 # define MAX(a,b) (((a) > (b)) ? (a) : (b))
44 #endif
46 #ifndef TRUE
47 # define FALSE 0
48 # define TRUE 1
49 #endif
51 /* Increment size of area for control records. */
52 #define ALLOC_SIZE 20
54 /* The default prefix for output file names. */
55 #define DEFAULT_PREFIX "xx"
57 typedef int boolean;
59 /* A compiled pattern arg. */
60 struct control
62 char *regexpr; /* Non-compiled regular expression. */
63 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
64 int offset; /* Offset from regexp to split at. */
65 int lines_required; /* Number of lines required. */
66 unsigned int repeat; /* Repeat count. */
67 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
68 int argnum; /* ARGV index. */
69 boolean ignore; /* If true, produce no output (for regexp). */
72 /* Initial size of data area in buffers. */
73 #define START_SIZE 8191
75 /* Increment size for data area. */
76 #define INCR_SIZE 2048
78 /* Number of lines kept in each node in line list. */
79 #define CTRL_SIZE 80
81 #ifdef DEBUG
82 /* Some small values to test the algorithms. */
83 # define START_SIZE 200
84 # define INCR_SIZE 10
85 # define CTRL_SIZE 1
86 #endif
88 /* A string with a length count. */
89 struct cstring
91 int len;
92 char *str;
95 /* Pointers to the beginnings of lines in the buffer area.
96 These structures are linked together if needed. */
97 struct line
99 unsigned used; /* Number of offsets used in this struct. */
100 unsigned insert_index; /* Next offset to use when inserting line. */
101 unsigned retrieve_index; /* Next index to use when retrieving line. */
102 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
103 struct line *next; /* Next in linked list. */
106 /* The structure to hold the input lines.
107 Contains a pointer to the data area and a list containing
108 pointers to the individual lines. */
109 struct buffer_record
111 unsigned bytes_alloc; /* Size of the buffer area. */
112 unsigned bytes_used; /* Bytes used in the buffer area. */
113 unsigned start_line; /* First line number in this buffer. */
114 unsigned first_available; /* First line that can be retrieved. */
115 unsigned num_lines; /* Number of complete lines in this buffer. */
116 char *buffer; /* Data area. */
117 struct line *line_start; /* Head of list of pointers to lines. */
118 struct line *curr_line; /* The line start record currently in use. */
119 struct buffer_record *next;
122 static void close_output_file PARAMS ((void));
123 static void create_output_file PARAMS ((void));
124 static void delete_all_files PARAMS ((void));
125 static void save_line_to_file PARAMS ((const struct cstring *line));
126 static void usage PARAMS ((int status));
128 /* The name this program was run with. */
129 char *program_name;
131 /* Convert the number of 8-bit bytes of a binary representation to
132 the number of characters required to represent the same quantity
133 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
134 require a field width as wide as 11 characters. */
135 static const unsigned int bytes_to_octal_digits[] =
136 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
138 /* Input file descriptor. */
139 static int input_desc = 0;
141 /* List of available buffers. */
142 static struct buffer_record *free_list = NULL;
144 /* Start of buffer list. */
145 static struct buffer_record *head = NULL;
147 /* Partially read line. */
148 static char *hold_area = NULL;
150 /* Number of chars in `hold_area'. */
151 static unsigned hold_count = 0;
153 /* Number of the last line in the buffers. */
154 static unsigned last_line_number = 0;
156 /* Number of the line currently being examined. */
157 static unsigned current_line = 0;
159 /* If TRUE, we have read EOF. */
160 static boolean have_read_eof = FALSE;
162 /* Name of output files. */
163 static char *filename_space = NULL;
165 /* Prefix part of output file names. */
166 static char *prefix = NULL;
168 /* Suffix part of output file names. */
169 static char *suffix = NULL;
171 /* Number of digits to use in output file names. */
172 static int digits = 2;
174 /* Number of files created so far. */
175 static unsigned int files_created = 0;
177 /* Number of bytes written to current file. */
178 static unsigned int bytes_written;
180 /* Output file pointer. */
181 static FILE *output_stream = NULL;
183 /* Output file name. */
184 static char *output_filename = NULL;
186 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
187 static char **global_argv;
189 /* If TRUE, do not print the count of bytes in each output file. */
190 static boolean suppress_count;
192 /* If TRUE, remove output files on error. */
193 static boolean remove_files;
195 /* If TRUE, remove all output files which have a zero length. */
196 static boolean elide_empty_files;
198 /* The compiled pattern arguments, which determine how to split
199 the input file. */
200 static struct control *controls;
202 /* Number of elements in `controls'. */
203 static unsigned int control_used;
205 /* If nonzero, display usage information and exit. */
206 static int show_help;
208 /* If nonzero, print the version on standard output then exit. */
209 static int show_version;
211 static struct option const longopts[] =
213 {"digits", required_argument, NULL, 'n'},
214 {"quiet", no_argument, NULL, 'q'},
215 {"silent", no_argument, NULL, 's'},
216 {"keep-files", no_argument, NULL, 'k'},
217 {"elide-empty-files", no_argument, NULL, 'z'},
218 {"prefix", required_argument, NULL, 'f'},
219 {"suffix-format", required_argument, NULL, 'b'},
220 {"help", no_argument, &show_help, 1},
221 {"version", no_argument, &show_version, 1},
222 {NULL, 0, NULL, 0}
225 /* Optionally remove files created so far; then exit.
226 Called when an error detected. */
228 static void
229 cleanup (void)
231 if (output_stream)
232 close_output_file ();
234 if (remove_files)
235 delete_all_files ();
238 static void
239 cleanup_fatal (void)
241 cleanup ();
242 exit (EXIT_FAILURE);
245 static RETSIGTYPE
246 interrupt_handler (int sig)
248 #ifdef SA_INTERRUPT
249 struct sigaction sigact;
251 sigact.sa_handler = SIG_DFL;
252 sigemptyset (&sigact.sa_mask);
253 sigact.sa_flags = 0;
254 sigaction (sig, &sigact, NULL);
255 #else /* !SA_INTERRUPT */
256 signal (sig, SIG_DFL);
257 #endif /* SA_INTERRUPT */
258 cleanup ();
259 kill (getpid (), sig);
262 /* Keep track of NUM chars of a partial line in buffer START.
263 These chars will be retrieved later when another large buffer is read.
264 It is not necessary to create a new buffer for these chars; instead,
265 we keep a pointer to the existing buffer. This buffer *is* on the
266 free list, and when the next buffer is obtained from this list
267 (even if it is this one), these chars will be placed at the
268 start of the new buffer. */
270 static void
271 save_to_hold_area (char *start, unsigned int num)
273 hold_area = start;
274 hold_count = num;
277 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
278 Return the number of chars read. */
280 static int
281 read_input (char *dest, unsigned int max_n_bytes)
283 int bytes_read;
285 if (max_n_bytes == 0)
286 return 0;
288 bytes_read = safe_read (input_desc, dest, max_n_bytes);
290 if (bytes_read == 0)
291 have_read_eof = TRUE;
293 if (bytes_read < 0)
295 error (0, errno, _("read error"));
296 cleanup_fatal ();
299 return bytes_read;
302 /* Initialize existing line record P. */
304 static void
305 clear_line_control (struct line *p)
307 p->used = 0;
308 p->insert_index = 0;
309 p->retrieve_index = 0;
312 /* Initialize all line records in B. */
314 static void
315 clear_all_line_control (struct buffer_record *b)
317 struct line *l;
319 for (l = b->line_start; l; l = l->next)
320 clear_line_control (l);
323 /* Return a new, initialized line record. */
325 static struct line *
326 new_line_control (void)
328 struct line *p;
330 p = (struct line *) xmalloc (sizeof (struct line));
332 p->next = NULL;
333 clear_line_control (p);
335 return p;
338 /* Record LINE_START, which is the address of the start of a line
339 of length LINE_LEN in the large buffer, in the lines buffer of B. */
341 static void
342 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
344 struct line *l;
346 /* If there is no existing area to keep line info, get some. */
347 if (b->line_start == NULL)
348 b->line_start = b->curr_line = new_line_control ();
350 /* If existing area for lines is full, get more. */
351 if (b->curr_line->used == CTRL_SIZE)
353 b->curr_line->next = new_line_control ();
354 b->curr_line = b->curr_line->next;
357 l = b->curr_line;
359 /* Record the start of the line, and update counters. */
360 l->starts[l->insert_index].str = line_start;
361 l->starts[l->insert_index].len = line_len;
362 l->used++;
363 l->insert_index++;
366 /* Scan the buffer in B for newline characters
367 and record the line start locations and lengths in B.
368 Return the number of lines found in this buffer.
370 There may be an incomplete line at the end of the buffer;
371 a pointer is kept to this area, which will be used when
372 the next buffer is filled. */
374 static unsigned int
375 record_line_starts (struct buffer_record *b)
377 char *line_start; /* Start of current line. */
378 char *line_end; /* End of each line found. */
379 unsigned int bytes_left; /* Length of incomplete last line. */
380 unsigned int lines; /* Number of lines found. */
381 unsigned int line_length; /* Length of each line found. */
383 if (b->bytes_used == 0)
384 return 0;
386 lines = 0;
387 line_start = b->buffer;
388 bytes_left = b->bytes_used;
390 for (;;)
392 line_end = memchr (line_start, '\n', bytes_left);
393 if (line_end == NULL)
394 break;
395 line_length = line_end - line_start + 1;
396 keep_new_line (b, line_start, line_length);
397 bytes_left -= line_length;
398 line_start = line_end + 1;
399 lines++;
402 /* Check for an incomplete last line. */
403 if (bytes_left)
405 if (have_read_eof)
407 keep_new_line (b, line_start, bytes_left);
408 lines++;
410 else
411 save_to_hold_area (line_start, bytes_left);
414 b->num_lines = lines;
415 b->first_available = b->start_line = last_line_number + 1;
416 last_line_number += lines;
418 return lines;
421 /* Return a new buffer with room to store SIZE bytes, plus
422 an extra byte for safety. */
424 static struct buffer_record *
425 create_new_buffer (unsigned int size)
427 struct buffer_record *new_buffer;
429 new_buffer = (struct buffer_record *)
430 xmalloc (sizeof (struct buffer_record));
432 new_buffer->buffer = (char *) xmalloc (size + 1);
434 new_buffer->bytes_alloc = size;
435 new_buffer->line_start = new_buffer->curr_line = NULL;
437 return new_buffer;
440 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
441 least that size is currently free, use it, otherwise create a new one. */
443 static struct buffer_record *
444 get_new_buffer (unsigned int min_size)
446 struct buffer_record *p, *q;
447 struct buffer_record *new_buffer; /* Buffer to return. */
448 unsigned int alloc_size; /* Actual size that will be requested. */
450 alloc_size = START_SIZE;
451 while (min_size > alloc_size)
452 alloc_size += INCR_SIZE;
454 if (free_list == NULL)
455 new_buffer = create_new_buffer (alloc_size);
456 else
458 /* Use first-fit to find a buffer. */
459 p = new_buffer = NULL;
460 q = free_list;
464 if (q->bytes_alloc >= min_size)
466 if (p == NULL)
467 free_list = q->next;
468 else
469 p->next = q->next;
470 break;
472 p = q;
473 q = q->next;
475 while (q);
477 new_buffer = (q ? q : create_new_buffer (alloc_size));
479 new_buffer->curr_line = new_buffer->line_start;
480 clear_all_line_control (new_buffer);
483 new_buffer->num_lines = 0;
484 new_buffer->bytes_used = 0;
485 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
486 new_buffer->next = NULL;
488 return new_buffer;
491 /* Add buffer BUF to the list of free buffers. */
493 static void
494 free_buffer (struct buffer_record *buf)
496 buf->next = free_list;
497 free_list = buf;
500 /* Append buffer BUF to the linked list of buffers that contain
501 some data yet to be processed. */
503 static void
504 save_buffer (struct buffer_record *buf)
506 struct buffer_record *p;
508 buf->next = NULL;
509 buf->curr_line = buf->line_start;
511 if (head == NULL)
512 head = buf;
513 else
515 for (p = head; p->next; p = p->next)
516 /* Do nothing. */ ;
517 p->next = buf;
521 /* Fill a buffer of input.
523 Set the initial size of the buffer to a default.
524 Fill the buffer (from the hold area and input stream)
525 and find the individual lines.
526 If no lines are found (the buffer is too small to hold the next line),
527 release the current buffer (whose contents would have been put in the
528 hold area) and repeat the process with another large buffer until at least
529 one entire line has been read.
531 Return TRUE if a new buffer was obtained, otherwise false
532 (in which case end-of-file must have been encountered). */
534 static boolean
535 load_buffer (void)
537 struct buffer_record *b;
538 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
539 unsigned int bytes_avail; /* Size of new buffer created. */
540 unsigned int lines_found; /* Number of lines in this new buffer. */
541 char *p; /* Place to load into buffer. */
543 if (have_read_eof)
544 return FALSE;
546 /* We must make the buffer at least as large as the amount of data
547 in the partial line left over from the last call. */
548 if (bytes_wanted < hold_count)
549 bytes_wanted = hold_count;
553 b = get_new_buffer (bytes_wanted);
554 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
555 p = b->buffer;
557 /* First check the `holding' area for a partial line. */
558 if (hold_count)
560 if (p != hold_area)
561 memcpy (p, hold_area, hold_count);
562 p += hold_count;
563 b->bytes_used += hold_count;
564 bytes_avail -= hold_count;
565 hold_count = 0;
568 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
570 lines_found = record_line_starts (b);
571 bytes_wanted = b->bytes_alloc * 2;
572 if (!lines_found)
573 free_buffer (b);
575 while (!lines_found && !have_read_eof);
577 if (lines_found)
578 save_buffer (b);
580 return lines_found != 0;
583 /* Return the line number of the first line that has not yet been retrieved. */
585 static unsigned int
586 get_first_line_in_buffer (void)
588 if (head == NULL && !load_buffer ())
589 error (EXIT_FAILURE, errno, _("input disappeared"));
591 return head->first_available;
594 /* Return a pointer to the logical first line in the buffer and make the
595 next line the logical first line.
596 Return NULL if there is no more input. */
598 static struct cstring *
599 remove_line (void)
601 struct cstring *line; /* Return value. */
602 struct line *l; /* For convenience. */
604 if (head == NULL && !load_buffer ())
605 return NULL;
607 if (current_line < head->first_available)
608 current_line = head->first_available;
610 ++(head->first_available);
612 l = head->curr_line;
614 line = &l->starts[l->retrieve_index];
616 /* Advance index to next line. */
617 if (++l->retrieve_index == l->used)
619 /* Go on to the next line record. */
620 head->curr_line = l->next;
621 if (head->curr_line == NULL || head->curr_line->used == 0)
623 /* Go on to the next data block. */
624 struct buffer_record *b = head;
625 head = head->next;
626 free_buffer (b);
630 return line;
633 /* Search the buffers for line LINENUM, reading more input if necessary.
634 Return a pointer to the line, or NULL if it is not found in the file. */
636 static struct cstring *
637 find_line (unsigned int linenum)
639 struct buffer_record *b;
641 if (head == NULL && !load_buffer ())
642 return NULL;
644 if (linenum < head->start_line)
645 return NULL;
647 for (b = head;;)
649 if (linenum < b->start_line + b->num_lines)
651 /* The line is in this buffer. */
652 struct line *l;
653 unsigned int offset; /* How far into the buffer the line is. */
655 l = b->line_start;
656 offset = linenum - b->start_line;
657 /* Find the control record. */
658 while (offset >= CTRL_SIZE)
660 l = l->next;
661 offset -= CTRL_SIZE;
663 return &l->starts[offset];
665 if (b->next == NULL && !load_buffer ())
666 return NULL;
667 b = b->next; /* Try the next data block. */
671 /* Return TRUE if at least one more line is available for input. */
673 static boolean
674 no_more_lines (void)
676 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
679 /* Set the name of the input file to NAME and open it. */
681 static void
682 set_input_file (const char *name)
684 if (STREQ (name, "-"))
685 input_desc = 0;
686 else
688 input_desc = open (name, O_RDONLY);
689 if (input_desc < 0)
690 error (EXIT_FAILURE, errno, "%s", name);
694 /* Write all lines from the beginning of the buffer up to, but
695 not including, line LAST_LINE, to the current output file.
696 If IGNORE is TRUE, do not output lines selected here.
697 ARGNUM is the index in ARGV of the current pattern. */
699 static void
700 write_to_file (unsigned int last_line, boolean ignore, int argnum)
702 struct cstring *line;
703 unsigned int first_line; /* First available input line. */
704 unsigned int lines; /* Number of lines to output. */
705 unsigned int i;
707 first_line = get_first_line_in_buffer ();
709 if (first_line > last_line)
711 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
712 cleanup_fatal ();
715 lines = last_line - first_line;
717 for (i = 0; i < lines; i++)
719 line = remove_line ();
720 if (line == NULL)
722 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
723 cleanup_fatal ();
725 if (!ignore)
726 save_line_to_file (line);
730 /* Output any lines left after all regexps have been processed. */
732 static void
733 dump_rest_of_file (void)
735 struct cstring *line;
737 while ((line = remove_line ()) != NULL)
738 save_line_to_file (line);
741 /* Handle an attempt to read beyond EOF under the control of record P,
742 on iteration REPETITION if nonzero. */
744 static void
745 handle_line_error (const struct control *p, int repetition)
747 fprintf (stderr, _("%s: `%d': line number out of range"),
748 program_name, p->lines_required);
749 if (repetition)
750 fprintf (stderr, _(" on repetition %d\n"), repetition);
751 else
752 fprintf (stderr, "\n");
754 cleanup_fatal ();
757 /* Determine the line number that marks the end of this file,
758 then get those lines and save them to the output file.
759 P is the control record.
760 REPETITION is the repetition number. */
762 static void
763 process_line_count (const struct control *p, int repetition)
765 unsigned int linenum;
766 unsigned int last_line_to_save = p->lines_required * (repetition + 1);
767 struct cstring *line;
769 create_output_file ();
771 linenum = get_first_line_in_buffer ();
773 /* Initially, I wanted to assert linenum < last_line_to_save, but that
774 condition is false for the valid command: echo | csplit - 1 '{*}'.
775 So, relax it just a little. */
776 assert ((linenum == 1 && last_line_to_save == 1)
777 || linenum < last_line_to_save);
779 while (linenum++ < last_line_to_save)
781 line = remove_line ();
782 if (line == NULL)
783 handle_line_error (p, repetition);
784 save_line_to_file (line);
787 close_output_file ();
789 /* Ensure that the line number specified is not 1 greater than
790 the number of lines in the file. */
791 if (no_more_lines ())
792 handle_line_error (p, repetition);
795 static void
796 regexp_error (struct control *p, int repetition, boolean ignore)
798 fprintf (stderr, _("%s: `%s': match not found"),
799 program_name, global_argv[p->argnum]);
801 if (repetition)
802 fprintf (stderr, _(" on repetition %d\n"), repetition);
803 else
804 fprintf (stderr, "\n");
806 if (!ignore)
808 dump_rest_of_file ();
809 close_output_file ();
811 cleanup_fatal ();
814 /* Read the input until a line matches the regexp in P, outputting
815 it unless P->IGNORE is TRUE.
816 REPETITION is this repeat-count; 0 means the first time. */
818 static void
819 process_regexp (struct control *p, int repetition)
821 struct cstring *line; /* From input file. */
822 unsigned int line_len; /* To make "$" in regexps work. */
823 unsigned int break_line; /* First line number of next file. */
824 boolean ignore = p->ignore; /* If TRUE, skip this section. */
825 int ret;
827 if (!ignore)
828 create_output_file ();
830 /* If there is no offset for the regular expression, or
831 it is positive, then it is not necessary to buffer the lines. */
833 if (p->offset >= 0)
835 for (;;)
837 line = find_line (++current_line);
838 if (line == NULL)
840 if (p->repeat_forever)
842 if (!ignore)
844 dump_rest_of_file ();
845 close_output_file ();
847 exit (EXIT_SUCCESS);
849 else
850 regexp_error (p, repetition, ignore);
852 line_len = line->len;
853 if (line->str[line_len - 1] == '\n')
854 line_len--;
855 ret = re_search (&p->re_compiled, line->str, line_len,
856 0, line_len, (struct re_registers *) 0);
857 if (ret == -2)
859 error (0, 0, _("error in regular expression search"));
860 cleanup_fatal ();
862 if (ret == -1)
864 line = remove_line ();
865 if (!ignore)
866 save_line_to_file (line);
868 else
869 break;
872 else
874 /* Buffer the lines. */
875 for (;;)
877 line = find_line (++current_line);
878 if (line == NULL)
880 if (p->repeat_forever)
882 if (!ignore)
884 dump_rest_of_file ();
885 close_output_file ();
887 exit (EXIT_SUCCESS);
889 else
890 regexp_error (p, repetition, ignore);
892 line_len = line->len;
893 if (line->str[line_len - 1] == '\n')
894 line_len--;
895 ret = re_search (&p->re_compiled, line->str, line_len,
896 0, line_len, (struct re_registers *) 0);
897 if (ret == -2)
899 error (0, 0, _("error in regular expression search"));
900 cleanup_fatal ();
902 if (ret >= 0)
903 break;
907 /* Account for any offset from this regexp. */
908 break_line = current_line + p->offset;
910 write_to_file (break_line, ignore, p->argnum);
912 if (!ignore)
913 close_output_file ();
915 if (p->offset > 0)
916 current_line = break_line;
919 /* Split the input file according to the control records we have built. */
921 static void
922 split_file (void)
924 unsigned int i, j;
926 for (i = 0; i < control_used; i++)
928 if (controls[i].regexpr)
930 for (j = 0; (controls[i].repeat_forever
931 || j <= controls[i].repeat); j++)
932 process_regexp (&controls[i], j);
934 else
936 for (j = 0; (controls[i].repeat_forever
937 || j <= controls[i].repeat); j++)
938 process_line_count (&controls[i], j);
942 create_output_file ();
943 dump_rest_of_file ();
944 close_output_file ();
947 /* Return the name of output file number NUM. */
949 static char *
950 make_filename (unsigned int num)
952 strcpy (filename_space, prefix);
953 if (suffix)
954 sprintf (filename_space+strlen(prefix), suffix, num);
955 else
956 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
957 return filename_space;
960 /* Create the next output file. */
962 static void
963 create_output_file (void)
965 output_filename = make_filename (files_created);
966 output_stream = fopen (output_filename, "w");
967 if (output_stream == NULL)
969 error (0, errno, "%s", output_filename);
970 cleanup_fatal ();
972 files_created++;
973 bytes_written = 0;
976 /* Delete all the files we have created. */
978 static void
979 delete_all_files (void)
981 unsigned int i;
982 char *name;
984 for (i = 0; i < files_created; i++)
986 name = make_filename (i);
987 if (unlink (name))
988 error (0, errno, "%s", name);
992 /* Close the current output file and print the count
993 of characters in this file. */
995 static void
996 close_output_file (void)
998 if (output_stream)
1000 if (ferror (output_stream) || fclose (output_stream) == EOF)
1002 error (0, errno, _("write error for `%s'"), output_filename);
1003 output_stream = NULL;
1004 cleanup_fatal ();
1006 if (bytes_written == 0 && elide_empty_files)
1008 if (unlink (output_filename))
1009 error (0, errno, "%s", output_filename);
1010 files_created--;
1012 else
1014 /* FIXME: if we write to stdout here, we have to close stdout
1015 and check for errors. */
1016 if (!suppress_count)
1017 fprintf (stdout, "%d\n", bytes_written);
1019 output_stream = NULL;
1023 /* Save line LINE to the output file and
1024 increment the character count for the current file. */
1026 static void
1027 save_line_to_file (const struct cstring *line)
1029 fwrite (line->str, sizeof (char), line->len, output_stream);
1030 bytes_written += line->len;
1033 /* Return a new, initialized control record. */
1035 static struct control *
1036 new_control_record (void)
1038 static unsigned control_allocated = 0; /* Total space allocated. */
1039 struct control *p;
1041 if (control_allocated == 0)
1043 control_allocated = ALLOC_SIZE;
1044 controls = (struct control *)
1045 xmalloc (sizeof (struct control) * control_allocated);
1047 else if (control_used == control_allocated)
1049 control_allocated += ALLOC_SIZE;
1050 controls = (struct control *)
1051 xrealloc ((char *) controls,
1052 sizeof (struct control) * control_allocated);
1054 p = &controls[control_used++];
1055 p->regexpr = NULL;
1056 p->repeat = 0;
1057 p->repeat_forever = 0;
1058 p->lines_required = 0;
1059 p->offset = 0;
1060 return p;
1063 /* Check if there is a numeric offset after a regular expression.
1064 STR is the entire command line argument.
1065 P is the control record for this regular expression.
1066 NUM is the numeric part of STR. */
1068 static void
1069 check_for_offset (struct control *p, const char *str, const char *num)
1071 unsigned long val;
1073 if (*num != '-' && *num != '+')
1074 error (EXIT_FAILURE, 0, _("%s: `+' or `-' expected after delimeter"), str);
1076 if (xstrtoul (num + 1, NULL, 10, &val, "") != LONGINT_OK
1077 || val > UINT_MAX)
1078 error (EXIT_FAILURE, 0, _("%s: integer expected after `%c'"), str, *num);
1079 p->offset = (unsigned int) val;
1081 if (*num == '-')
1082 p->offset = -p->offset;
1085 /* Given that the first character of command line arg STR is '{',
1086 make sure that the rest of the string is a valid repeat count
1087 and store its value in P.
1088 ARGNUM is the ARGV index of STR. */
1090 static void
1091 parse_repeat_count (int argnum, struct control *p, char *str)
1093 unsigned long val;
1094 char *end;
1096 end = str + strlen (str) - 1;
1097 if (*end != '}')
1098 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1099 *end = '\0';
1101 if (str+1 == end-1 && *(str+1) == '*')
1102 p->repeat_forever = 1;
1103 else
1105 if (xstrtoul (str + 1, NULL, 10, &val, "") != LONGINT_OK
1106 || val > UINT_MAX)
1108 error (EXIT_FAILURE, 0,
1109 _("%s}: integer required between `{' and `}'"),
1110 global_argv[argnum]);
1112 p->repeat = (unsigned int) val;
1115 *end = '}';
1118 /* Extract the regular expression from STR and check for a numeric offset.
1119 STR should start with the regexp delimiter character.
1120 Return a new control record for the regular expression.
1121 ARGNUM is the ARGV index of STR.
1122 Unless IGNORE is TRUE, mark these lines for output. */
1124 static struct control *
1125 extract_regexp (int argnum, boolean ignore, char *str)
1127 int len; /* Number of chars in this regexp. */
1128 char delim = *str;
1129 char *closing_delim;
1130 struct control *p;
1131 const char *err;
1133 closing_delim = strrchr (str + 1, delim);
1134 if (closing_delim == NULL)
1135 error (EXIT_FAILURE, 0,
1136 _("%s: closing delimeter `%c' missing"), str, delim);
1138 len = closing_delim - str - 1;
1139 p = new_control_record ();
1140 p->argnum = argnum;
1141 p->ignore = ignore;
1143 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1144 strncpy (p->regexpr, str + 1, len);
1145 p->re_compiled.allocated = len * 2;
1146 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1147 p->re_compiled.fastmap = xmalloc (256);
1148 p->re_compiled.translate = 0;
1149 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1150 if (err)
1152 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1153 cleanup_fatal ();
1156 if (closing_delim[1])
1157 check_for_offset (p, str, closing_delim + 1);
1159 return p;
1162 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1163 After each pattern, check if the next argument is a repeat count. */
1165 static void
1166 parse_patterns (int argc, int start, char **argv)
1168 int i; /* Index into ARGV. */
1169 struct control *p; /* New control record created. */
1170 unsigned long val;
1171 static unsigned long last_val = 0;
1173 for (i = start; i < argc; i++)
1175 if (*argv[i] == '/' || *argv[i] == '%')
1177 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1179 else
1181 p = new_control_record ();
1182 p->argnum = i;
1184 if (xstrtoul (argv[i], NULL, 10, &val, "") != LONGINT_OK
1185 || val > INT_MAX)
1186 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1187 if (val == 0)
1188 error (EXIT_FAILURE, 0,
1189 _("%s: line number must be greater than zero"),
1190 argv[i]);
1191 if (val < last_val)
1192 error (EXIT_FAILURE, 0,
1193 _("line number `%s' is smaller than preceding line number, %lu"),
1194 argv[i], last_val);
1196 if (val == last_val)
1197 error (0, 0,
1198 _("warning: line number `%s' is the same as preceding line number"),
1199 argv[i]);
1200 last_val = val;
1202 p->lines_required = (int) val;
1205 if (i + 1 < argc && *argv[i + 1] == '{')
1207 /* We have a repeat count. */
1208 i++;
1209 parse_repeat_count (i, p, argv[i]);
1214 static unsigned
1215 get_format_flags (char **format_ptr)
1217 unsigned count = 0;
1219 for (; **format_ptr; (*format_ptr)++)
1221 switch (**format_ptr)
1223 case '-':
1224 break;
1226 case '+':
1227 case ' ':
1228 count++;
1229 break;
1231 case '#':
1232 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1233 break;
1235 default:
1236 return count;
1239 return count;
1242 static unsigned
1243 get_format_width (char **format_ptr)
1245 unsigned count = 0;
1246 char *start;
1247 int ch_save;
1249 start = *format_ptr;
1250 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1251 continue;
1253 ch_save = **format_ptr;
1254 **format_ptr = '\0';
1255 /* In the case where no minimum field width is explicitly specified,
1256 allow for enough octal digits to represent the value of LONG_MAX. */
1257 count = ((*format_ptr == start)
1258 ? bytes_to_octal_digits[sizeof (long)]
1259 : atoi (start));
1260 **format_ptr = ch_save;
1261 return count;
1264 static unsigned
1265 get_format_prec (char **format_ptr)
1267 unsigned count = 0;
1268 char *start;
1269 int ch_save;
1270 int is_negative;
1272 if (**format_ptr != '.')
1273 return 0;
1274 (*format_ptr)++;
1276 if (**format_ptr == '-' || **format_ptr == '+')
1278 is_negative = (**format_ptr == '-');
1279 (*format_ptr)++;
1281 else
1283 is_negative = 0;
1286 start = *format_ptr;
1287 for (; ISDIGIT (**format_ptr); (*format_ptr)++)
1288 continue;
1290 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1291 not there. */
1292 if (is_negative)
1293 start = *format_ptr;
1295 ch_save = **format_ptr;
1296 **format_ptr = '\0';
1297 count = (*format_ptr == start) ? 11 : atoi (start);
1298 **format_ptr = ch_save;
1300 return count;
1303 static void
1304 get_format_conv_type (char **format_ptr)
1306 int ch = *((*format_ptr)++);
1308 switch (ch)
1310 case 'd':
1311 case 'i':
1312 case 'o':
1313 case 'u':
1314 case 'x':
1315 case 'X':
1316 break;
1318 case 0:
1319 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1320 break;
1322 default:
1323 if (ISPRINT (ch))
1324 error (EXIT_FAILURE, 0,
1325 _("invalid conversion specifier in suffix: %c"), ch);
1326 else
1327 error (EXIT_FAILURE, 0,
1328 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1332 static unsigned
1333 max_out (char *format)
1335 unsigned out_count = 0;
1336 unsigned percents = 0;
1338 for (; *format; )
1340 int ch = *format++;
1342 if (ch != '%')
1343 out_count++;
1344 else
1346 percents++;
1347 out_count += get_format_flags (&format);
1349 int width = get_format_width (&format);
1350 int prec = get_format_prec (&format);
1352 out_count += MAX (width, prec);
1354 get_format_conv_type (&format);
1358 if (percents == 0)
1359 error (EXIT_FAILURE, 0,
1360 _("missing %% conversion specification in suffix"));
1361 else if (percents > 1)
1362 error (EXIT_FAILURE, 0,
1363 _("too many %% conversion specifications in suffix"));
1365 return out_count;
1369 main (int argc, char **argv)
1371 int optc;
1372 unsigned long val;
1373 #ifdef SA_INTERRUPT
1374 struct sigaction oldact, newact;
1375 #endif
1377 program_name = argv[0];
1378 setlocale (LC_ALL, "");
1379 bindtextdomain (PACKAGE, LOCALEDIR);
1380 textdomain (PACKAGE);
1382 global_argv = argv;
1383 controls = NULL;
1384 control_used = 0;
1385 suppress_count = FALSE;
1386 remove_files = TRUE;
1387 prefix = DEFAULT_PREFIX;
1389 /* Change the way xmalloc and xrealloc fail. */
1390 xalloc_fail_func = cleanup;
1392 #ifdef SA_INTERRUPT
1393 newact.sa_handler = interrupt_handler;
1394 sigemptyset (&newact.sa_mask);
1395 newact.sa_flags = 0;
1397 sigaction (SIGHUP, NULL, &oldact);
1398 if (oldact.sa_handler != SIG_IGN)
1399 sigaction (SIGHUP, &newact, NULL);
1401 sigaction (SIGINT, NULL, &oldact);
1402 if (oldact.sa_handler != SIG_IGN)
1403 sigaction (SIGINT, &newact, NULL);
1405 sigaction (SIGQUIT, NULL, &oldact);
1406 if (oldact.sa_handler != SIG_IGN)
1407 sigaction (SIGQUIT, &newact, NULL);
1409 sigaction (SIGTERM, NULL, &oldact);
1410 if (oldact.sa_handler != SIG_IGN)
1411 sigaction (SIGTERM, &newact, NULL);
1412 #else /* not SA_INTERRUPT */
1413 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1414 signal (SIGHUP, interrupt_handler);
1415 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1416 signal (SIGINT, interrupt_handler);
1417 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1418 signal (SIGQUIT, interrupt_handler);
1419 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1420 signal (SIGTERM, interrupt_handler);
1421 #endif /* not SA_INTERRUPT */
1423 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1424 switch (optc)
1426 case 0:
1427 break;
1429 case 'f':
1430 prefix = optarg;
1431 break;
1433 case 'b':
1434 suffix = optarg;
1435 break;
1437 case 'k':
1438 remove_files = FALSE;
1439 break;
1441 case 'n':
1442 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1443 || val > INT_MAX)
1444 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1445 digits = (int) val;
1446 break;
1448 case 's':
1449 case 'q':
1450 suppress_count = TRUE;
1451 break;
1453 case 'z':
1454 elide_empty_files = TRUE;
1455 break;
1457 default:
1458 usage (1);
1461 if (show_version)
1463 printf ("csplit (%s) %s\n", GNU_PACKAGE, VERSION);
1464 exit (EXIT_SUCCESS);
1467 if (show_help)
1468 usage (0);
1470 if (argc - optind < 2)
1472 error (0, 0, _("too few arguments"));
1473 usage (1);
1476 if (suffix)
1477 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1478 else
1479 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1481 set_input_file (argv[optind++]);
1483 parse_patterns (argc, optind, argv);
1485 split_file ();
1487 if (close (input_desc) < 0)
1489 error (0, errno, _("read error"));
1490 cleanup_fatal ();
1493 if (!suppress_count && (ferror (stdout) || fclose (stdout) == EOF))
1494 error (EXIT_FAILURE, errno, _("write error"));
1496 exit (EXIT_SUCCESS);
1499 static void
1500 usage (int status)
1502 if (status != 0)
1503 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1504 program_name);
1505 else
1507 printf (_("\
1508 Usage: %s [OPTION]... FILE PATTERN...\n\
1510 program_name);
1511 printf (_("\
1512 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1513 and output byte counts of each piece to standard output.\n\
1515 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1516 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1517 -k, --keep-files do not remove output files on errors\n\
1518 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1519 -s, --quiet, --silent do not print counts of output file sizes\n\
1520 -z, --elide-empty-files remove empty output files\n\
1521 --help display this help and exit\n\
1522 --version output version information and exit\n\
1524 Read standard input if FILE is -. Each PATTERN may be:\n\
1526 INTEGER copy up to but not including specified line number\n\
1527 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1528 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1529 {INTEGER} repeat the previous pattern specified number of times\n\
1530 {*} repeat the previous pattern as many times as possible\n\
1532 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1533 "));
1534 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
1536 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);