(main): Declare to be of type int, not void.
[coreutils.git] / src / csplit.c
blob8ff468b6f7c0de3b320c8f975974d69488c00c11
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 1991, 1995 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
21 #include <config.h>
23 #include <stdio.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <signal.h>
27 #ifdef HAVE_LIMITS_H
28 #include <limits.h>
29 #endif /* HAVE_LIMITS_H */
31 #ifndef UINT_MAX
32 # define UINT_MAX ((unsigned int) ~(unsigned int) 0)
33 #endif
35 #ifndef INT_MAX
36 # define INT_MAX ((int) (UINT_MAX >> 1))
37 #endif
39 #if WITH_REGEX
40 # include <regex.h>
41 #else
42 # include <rx.h>
43 #endif
44 #include "system.h"
45 #include "error.h"
46 #include "xstrtoul.h"
48 #ifdef STDC_HEADERS
49 #include <stdlib.h>
50 #else
51 char *malloc ();
52 char *realloc ();
53 #endif
55 #ifndef MAX
56 #define MAX(a,b) (((a) > (b)) ? (a) : (b))
57 #endif
59 #ifndef TRUE
60 #define FALSE 0
61 #define TRUE 1
62 #endif
64 /* Increment size of area for control records. */
65 #define ALLOC_SIZE 20
67 /* The default prefix for output file names. */
68 #define DEFAULT_PREFIX "xx"
70 typedef int boolean;
72 /* A compiled pattern arg. */
73 struct control
75 char *regexpr; /* Non-compiled regular expression. */
76 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
77 int offset; /* Offset from regexp to split at. */
78 int lines_required; /* Number of lines required. */
79 unsigned int repeat; /* Repeat count. */
80 int repeat_forever; /* Non-zero if `*' used as a repeat count. */
81 int argnum; /* ARGV index. */
82 boolean ignore; /* If true, produce no output (for regexp). */
85 /* Initial size of data area in buffers. */
86 #define START_SIZE 8191
88 /* Increment size for data area. */
89 #define INCR_SIZE 2048
91 /* Number of lines kept in each node in line list. */
92 #define CTRL_SIZE 80
94 #ifdef DEBUG
95 /* Some small values to test the algorithms. */
96 #define START_SIZE 200
97 #define INCR_SIZE 10
98 #define CTRL_SIZE 1
99 #endif
101 /* A string with a length count. */
102 struct cstring
104 int len;
105 char *str;
108 /* Pointers to the beginnings of lines in the buffer area.
109 These structures are linked together if needed. */
110 struct line
112 unsigned used; /* Number of offsets used in this struct. */
113 unsigned insert_index; /* Next offset to use when inserting line. */
114 unsigned retrieve_index; /* Next index to use when retrieving line. */
115 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
116 struct line *next; /* Next in linked list. */
119 /* The structure to hold the input lines.
120 Contains a pointer to the data area and a list containing
121 pointers to the individual lines. */
122 struct buffer_record
124 unsigned bytes_alloc; /* Size of the buffer area. */
125 unsigned bytes_used; /* Bytes used in the buffer area. */
126 unsigned start_line; /* First line number in this buffer. */
127 unsigned first_available; /* First line that can be retrieved. */
128 unsigned num_lines; /* Number of complete lines in this buffer. */
129 char *buffer; /* Data area. */
130 struct line *line_start; /* Head of list of pointers to lines. */
131 struct line *curr_line; /* The line start record currently in use. */
132 struct buffer_record *next;
135 int safe_read ();
137 static void close_output_file __P ((void));
138 static void create_output_file __P ((void));
139 static void delete_all_files __P ((void));
140 static void save_line_to_file __P ((const struct cstring *line));
141 static void usage __P ((int status));
143 /* The name this program was run with. */
144 char *program_name;
146 /* Convert the number of 8-bit bytes of a binary representation to
147 the number of characters required to represent the same quantity
148 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
149 require a field width as wide as 11 characters. */
150 static const unsigned int bytes_to_octal_digits[] =
151 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
153 /* Input file descriptor. */
154 static int input_desc = 0;
156 /* List of available buffers. */
157 static struct buffer_record *free_list = NULL;
159 /* Start of buffer list. */
160 static struct buffer_record *head = NULL;
162 /* Partially read line. */
163 static char *hold_area = NULL;
165 /* Number of chars in `hold_area'. */
166 static unsigned hold_count = 0;
168 /* Number of the last line in the buffers. */
169 static unsigned last_line_number = 0;
171 /* Number of the line currently being examined. */
172 static unsigned current_line = 0;
174 /* If TRUE, we have read EOF. */
175 static boolean have_read_eof = FALSE;
177 /* Name of output files. */
178 static char *filename_space = NULL;
180 /* Prefix part of output file names. */
181 static char *prefix = NULL;
183 /* Suffix part of output file names. */
184 static char *suffix = NULL;
186 /* Number of digits to use in output file names. */
187 static int digits = 2;
189 /* Number of files created so far. */
190 static unsigned int files_created = 0;
192 /* Number of bytes written to current file. */
193 static unsigned int bytes_written;
195 /* Output file pointer. */
196 static FILE *output_stream = NULL;
198 /* Output file name. */
199 static char *output_filename = NULL;
201 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
202 static char **global_argv;
204 /* If TRUE, do not print the count of bytes in each output file. */
205 static boolean suppress_count;
207 /* If TRUE, remove output files on error. */
208 static boolean remove_files;
210 /* If TRUE, remove all output files which have a zero length. */
211 static boolean elide_empty_files;
213 /* The compiled pattern arguments, which determine how to split
214 the input file. */
215 static struct control *controls;
217 /* Number of elements in `controls'. */
218 static unsigned int control_used;
220 /* If nonzero, display usage information and exit. */
221 static int show_help;
223 /* If nonzero, print the version on standard output then exit. */
224 static int show_version;
226 static struct option const longopts[] =
228 {"digits", required_argument, NULL, 'n'},
229 {"quiet", no_argument, NULL, 'q'},
230 {"silent", no_argument, NULL, 's'},
231 {"keep-files", no_argument, NULL, 'k'},
232 {"elide-empty-files", no_argument, NULL, 'z'},
233 {"prefix", required_argument, NULL, 'f'},
234 {"suffix-format", required_argument, NULL, 'b'},
235 {"help", no_argument, &show_help, 1},
236 {"version", no_argument, &show_version, 1},
237 {NULL, 0, NULL, 0}
240 /* Optionally remove files created so far; then exit.
241 Called when an error detected. */
243 static void
244 cleanup (void)
246 if (output_stream)
247 close_output_file ();
249 if (remove_files)
250 delete_all_files ();
253 static void
254 cleanup_fatal (void)
256 cleanup ();
257 exit (1);
260 static RETSIGTYPE
261 interrupt_handler (int sig)
263 #ifdef SA_INTERRUPT
264 struct sigaction sigact;
266 sigact.sa_handler = SIG_DFL;
267 sigemptyset (&sigact.sa_mask);
268 sigact.sa_flags = 0;
269 sigaction (sig, &sigact, NULL);
270 #else /* !SA_INTERRUPT */
271 signal (sig, SIG_DFL);
272 #endif /* SA_INTERRUPT */
273 cleanup ();
274 kill (getpid (), sig);
277 /* Allocate N bytes of memory dynamically, with error checking. */
279 static char *
280 xmalloc (unsigned int n)
282 char *p;
284 p = malloc (n);
285 if (p == NULL)
287 error (0, 0, _("virtual memory exhausted"));
288 cleanup_fatal ();
290 return p;
293 /* Change the size of an allocated block of memory P to N bytes,
294 with error checking.
295 If P is NULL, run xmalloc.
296 If N is 0, run free and return NULL. */
298 static char *
299 xrealloc (char *p, unsigned int n)
301 if (p == NULL)
302 return xmalloc (n);
303 if (n == 0)
305 free (p);
306 return 0;
308 p = realloc (p, n);
309 if (p == NULL)
311 error (0, 0, _("virtual memory exhausted"));
312 cleanup_fatal ();
314 return p;
317 /* Keep track of NUM chars of a partial line in buffer START.
318 These chars will be retrieved later when another large buffer is read.
319 It is not necessary to create a new buffer for these chars; instead,
320 we keep a pointer to the existing buffer. This buffer *is* on the
321 free list, and when the next buffer is obtained from this list
322 (even if it is this one), these chars will be placed at the
323 start of the new buffer. */
325 static void
326 save_to_hold_area (char *start, unsigned int num)
328 hold_area = start;
329 hold_count = num;
332 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
333 Return the number of chars read. */
335 static int
336 read_input (char *dest, unsigned int max_n_bytes)
338 int bytes_read;
340 if (max_n_bytes == 0)
341 return 0;
343 bytes_read = safe_read (input_desc, dest, max_n_bytes);
345 if (bytes_read == 0)
346 have_read_eof = TRUE;
348 if (bytes_read < 0)
350 error (0, errno, _("read error"));
351 cleanup_fatal ();
354 return bytes_read;
357 /* Initialize existing line record P. */
359 static void
360 clear_line_control (struct line *p)
362 p->used = 0;
363 p->insert_index = 0;
364 p->retrieve_index = 0;
367 /* Initialize all line records in B. */
369 static void
370 clear_all_line_control (struct buffer_record *b)
372 struct line *l;
374 for (l = b->line_start; l; l = l->next)
375 clear_line_control (l);
378 /* Return a new, initialized line record. */
380 static struct line *
381 new_line_control (void)
383 struct line *p;
385 p = (struct line *) xmalloc (sizeof (struct line));
387 p->next = NULL;
388 clear_line_control (p);
390 return p;
393 /* Record LINE_START, which is the address of the start of a line
394 of length LINE_LEN in the large buffer, in the lines buffer of B. */
396 static void
397 keep_new_line (struct buffer_record *b, char *line_start, int line_len)
399 struct line *l;
401 /* If there is no existing area to keep line info, get some. */
402 if (b->line_start == NULL)
403 b->line_start = b->curr_line = new_line_control ();
405 /* If existing area for lines is full, get more. */
406 if (b->curr_line->used == CTRL_SIZE)
408 b->curr_line->next = new_line_control ();
409 b->curr_line = b->curr_line->next;
412 l = b->curr_line;
414 /* Record the start of the line, and update counters. */
415 l->starts[l->insert_index].str = line_start;
416 l->starts[l->insert_index].len = line_len;
417 l->used++;
418 l->insert_index++;
421 /* Scan the buffer in B for newline characters
422 and record the line start locations and lengths in B.
423 Return the number of lines found in this buffer.
425 There may be an incomplete line at the end of the buffer;
426 a pointer is kept to this area, which will be used when
427 the next buffer is filled. */
429 static unsigned int
430 record_line_starts (struct buffer_record *b)
432 char *line_start; /* Start of current line. */
433 char *line_end; /* End of each line found. */
434 unsigned int bytes_left; /* Length of incomplete last line. */
435 unsigned int lines; /* Number of lines found. */
436 unsigned int line_length; /* Length of each line found. */
438 if (b->bytes_used == 0)
439 return 0;
441 lines = 0;
442 line_start = b->buffer;
443 bytes_left = b->bytes_used;
445 for (;;)
447 line_end = memchr (line_start, '\n', bytes_left);
448 if (line_end == NULL)
449 break;
450 line_length = line_end - line_start + 1;
451 keep_new_line (b, line_start, line_length);
452 bytes_left -= line_length;
453 line_start = line_end + 1;
454 lines++;
457 /* Check for an incomplete last line. */
458 if (bytes_left)
460 if (have_read_eof)
462 keep_new_line (b, line_start, bytes_left);
463 lines++;
465 else
466 save_to_hold_area (line_start, bytes_left);
469 b->num_lines = lines;
470 b->first_available = b->start_line = last_line_number + 1;
471 last_line_number += lines;
473 return lines;
476 /* Return a new buffer with room to store SIZE bytes, plus
477 an extra byte for safety. */
479 static struct buffer_record *
480 create_new_buffer (unsigned int size)
482 struct buffer_record *new_buffer;
484 new_buffer = (struct buffer_record *)
485 xmalloc (sizeof (struct buffer_record));
487 new_buffer->buffer = (char *) xmalloc (size + 1);
489 new_buffer->bytes_alloc = size;
490 new_buffer->line_start = new_buffer->curr_line = NULL;
492 return new_buffer;
495 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
496 least that size is currently free, use it, otherwise create a new one. */
498 static struct buffer_record *
499 get_new_buffer (unsigned int min_size)
501 struct buffer_record *p, *q;
502 struct buffer_record *new_buffer; /* Buffer to return. */
503 unsigned int alloc_size; /* Actual size that will be requested. */
505 alloc_size = START_SIZE;
506 while (min_size > alloc_size)
507 alloc_size += INCR_SIZE;
509 if (free_list == NULL)
510 new_buffer = create_new_buffer (alloc_size);
511 else
513 /* Use first-fit to find a buffer. */
514 p = new_buffer = NULL;
515 q = free_list;
519 if (q->bytes_alloc >= min_size)
521 if (p == NULL)
522 free_list = q->next;
523 else
524 p->next = q->next;
525 break;
527 p = q;
528 q = q->next;
530 while (q);
532 new_buffer = (q ? q : create_new_buffer (alloc_size));
534 new_buffer->curr_line = new_buffer->line_start;
535 clear_all_line_control (new_buffer);
538 new_buffer->num_lines = 0;
539 new_buffer->bytes_used = 0;
540 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
541 new_buffer->next = NULL;
543 return new_buffer;
546 /* Add buffer BUF to the list of free buffers. */
548 static void
549 free_buffer (struct buffer_record *buf)
551 buf->next = free_list;
552 free_list = buf;
555 /* Append buffer BUF to the linked list of buffers that contain
556 some data yet to be processed. */
558 static void
559 save_buffer (struct buffer_record *buf)
561 struct buffer_record *p;
563 buf->next = NULL;
564 buf->curr_line = buf->line_start;
566 if (head == NULL)
567 head = buf;
568 else
570 for (p = head; p->next; p = p->next)
571 /* Do nothing. */ ;
572 p->next = buf;
576 /* Fill a buffer of input.
578 Set the initial size of the buffer to a default.
579 Fill the buffer (from the hold area and input stream)
580 and find the individual lines.
581 If no lines are found (the buffer is too small to hold the next line),
582 release the current buffer (whose contents would have been put in the
583 hold area) and repeat the process with another large buffer until at least
584 one entire line has been read.
586 Return TRUE if a new buffer was obtained, otherwise false
587 (in which case end-of-file must have been encountered). */
589 static boolean
590 load_buffer (void)
592 struct buffer_record *b;
593 unsigned int bytes_wanted = START_SIZE; /* Minimum buffer size. */
594 unsigned int bytes_avail; /* Size of new buffer created. */
595 unsigned int lines_found; /* Number of lines in this new buffer. */
596 char *p; /* Place to load into buffer. */
598 if (have_read_eof)
599 return FALSE;
601 /* We must make the buffer at least as large as the amount of data
602 in the partial line left over from the last call. */
603 if (bytes_wanted < hold_count)
604 bytes_wanted = hold_count;
608 b = get_new_buffer (bytes_wanted);
609 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
610 p = b->buffer;
612 /* First check the `holding' area for a partial line. */
613 if (hold_count)
615 if (p != hold_area)
616 memcpy (p, hold_area, hold_count);
617 p += hold_count;
618 b->bytes_used += hold_count;
619 bytes_avail -= hold_count;
620 hold_count = 0;
623 b->bytes_used += (unsigned int) read_input (p, bytes_avail);
625 lines_found = record_line_starts (b);
626 bytes_wanted = b->bytes_alloc * 2;
627 if (!lines_found)
628 free_buffer (b);
630 while (!lines_found && !have_read_eof);
632 if (lines_found)
633 save_buffer (b);
635 return lines_found != 0;
638 /* Return the line number of the first line that has not yet been retrieved. */
640 static unsigned int
641 get_first_line_in_buffer (void)
643 if (head == NULL && !load_buffer ())
644 error (1, errno, _("input disappeared"));
646 return head->first_available;
649 /* Return a pointer to the logical first line in the buffer and make the
650 next line the logical first line.
651 Return NULL if there is no more input. */
653 static struct cstring *
654 remove_line (void)
656 struct cstring *line; /* Return value. */
657 struct line *l; /* For convenience. */
659 if (head == NULL && !load_buffer ())
660 return NULL;
662 if (current_line < head->first_available)
663 current_line = head->first_available;
665 ++(head->first_available);
667 l = head->curr_line;
669 line = &l->starts[l->retrieve_index];
671 /* Advance index to next line. */
672 if (++l->retrieve_index == l->used)
674 /* Go on to the next line record. */
675 head->curr_line = l->next;
676 if (head->curr_line == NULL || head->curr_line->used == 0)
678 /* Go on to the next data block. */
679 struct buffer_record *b = head;
680 head = head->next;
681 free_buffer (b);
685 return line;
688 /* Search the buffers for line LINENUM, reading more input if necessary.
689 Return a pointer to the line, or NULL if it is not found in the file. */
691 static struct cstring *
692 find_line (unsigned int linenum)
694 struct buffer_record *b;
696 if (head == NULL && !load_buffer ())
697 return NULL;
699 if (linenum < head->start_line)
700 return NULL;
702 for (b = head;;)
704 if (linenum < b->start_line + b->num_lines)
706 /* The line is in this buffer. */
707 struct line *l;
708 unsigned int offset; /* How far into the buffer the line is. */
710 l = b->line_start;
711 offset = linenum - b->start_line;
712 /* Find the control record. */
713 while (offset >= CTRL_SIZE)
715 l = l->next;
716 offset -= CTRL_SIZE;
718 return &l->starts[offset];
720 if (b->next == NULL && !load_buffer ())
721 return NULL;
722 b = b->next; /* Try the next data block. */
726 /* Return TRUE if at least one more line is available for input. */
728 static boolean
729 no_more_lines (void)
731 return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
734 /* Set the name of the input file to NAME and open it. */
736 static void
737 set_input_file (const char *name)
739 if (!strcmp (name, "-"))
740 input_desc = 0;
741 else
743 input_desc = open (name, O_RDONLY);
744 if (input_desc < 0)
745 error (1, errno, "%s", name);
749 /* Write all lines from the beginning of the buffer up to, but
750 not including, line LAST_LINE, to the current output file.
751 If IGNORE is TRUE, do not output lines selected here.
752 ARGNUM is the index in ARGV of the current pattern. */
754 static void
755 write_to_file (unsigned int last_line, boolean ignore, int argnum)
757 struct cstring *line;
758 unsigned int first_line; /* First available input line. */
759 unsigned int lines; /* Number of lines to output. */
760 unsigned int i;
762 first_line = get_first_line_in_buffer ();
764 if (first_line > last_line)
766 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
767 cleanup_fatal ();
770 lines = last_line - first_line;
772 for (i = 0; i < lines; i++)
774 line = remove_line ();
775 if (line == NULL)
777 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
778 cleanup_fatal ();
780 if (!ignore)
781 save_line_to_file (line);
785 /* Output any lines left after all regexps have been processed. */
787 static void
788 dump_rest_of_file (void)
790 struct cstring *line;
792 while ((line = remove_line ()) != NULL)
793 save_line_to_file (line);
796 /* Handle an attempt to read beyond EOF under the control of record P,
797 on iteration REPETITION if nonzero. */
799 static void
800 handle_line_error (const struct control *p, int repetition)
802 fprintf (stderr, _("%s: `%d': line number out of range"),
803 program_name, p->lines_required);
804 if (repetition)
805 fprintf (stderr, _(" on repetition %d\n"), repetition);
806 else
807 fprintf (stderr, "\n");
809 cleanup_fatal ();
812 /* Determine the line number that marks the end of this file,
813 then get those lines and save them to the output file.
814 P is the control record.
815 REPETITION is the repetition number. */
817 static void
818 process_line_count (const struct control *p, int repetition)
820 unsigned int linenum;
821 unsigned int last_line_to_save = p->lines_required * (repetition + 1);
822 struct cstring *line;
824 create_output_file ();
826 linenum = get_first_line_in_buffer ();
828 /* Check for requesting a line that has already been written out.
829 If this ever happens, it's due to a bug in csplit. */
830 if (linenum >= last_line_to_save)
831 abort ();
833 while (linenum++ < last_line_to_save)
835 line = remove_line ();
836 if (line == NULL)
837 handle_line_error (p, repetition);
838 save_line_to_file (line);
841 close_output_file ();
843 /* Ensure that the line number specified is not 1 greater than
844 the number of lines in the file. */
845 if (no_more_lines ())
846 handle_line_error (p, repetition);
849 static void
850 regexp_error (struct control *p, int repetition, boolean ignore)
852 fprintf (stderr, _("%s: `%s': match not found"),
853 program_name, global_argv[p->argnum]);
855 if (repetition)
856 fprintf (stderr, _(" on repetition %d\n"), repetition);
857 else
858 fprintf (stderr, "\n");
860 if (!ignore)
862 dump_rest_of_file ();
863 close_output_file ();
865 cleanup_fatal ();
868 /* Read the input until a line matches the regexp in P, outputting
869 it unless P->IGNORE is TRUE.
870 REPETITION is this repeat-count; 0 means the first time. */
872 static void
873 process_regexp (struct control *p, int repetition)
875 struct cstring *line; /* From input file. */
876 unsigned int line_len; /* To make "$" in regexps work. */
877 unsigned int break_line; /* First line number of next file. */
878 boolean ignore = p->ignore; /* If TRUE, skip this section. */
879 int ret;
881 if (!ignore)
882 create_output_file ();
884 /* If there is no offset for the regular expression, or
885 it is positive, then it is not necessary to buffer the lines. */
887 if (p->offset >= 0)
889 for (;;)
891 line = find_line (++current_line);
892 if (line == NULL)
894 if (p->repeat_forever)
896 if (!ignore)
898 dump_rest_of_file ();
899 close_output_file ();
901 exit (0);
903 else
904 regexp_error (p, repetition, ignore);
906 line_len = line->len;
907 if (line->str[line_len - 1] == '\n')
908 line_len--;
909 ret = re_search (&p->re_compiled, line->str, line_len,
910 0, line_len, (struct re_registers *) 0);
911 if (ret == -2)
913 error (0, 0, _("error in regular expression search"));
914 cleanup_fatal ();
916 if (ret == -1)
918 line = remove_line ();
919 if (!ignore)
920 save_line_to_file (line);
922 else
923 break;
926 else
928 /* Buffer the lines. */
929 for (;;)
931 line = find_line (++current_line);
932 if (line == NULL)
934 if (p->repeat_forever)
936 if (!ignore)
938 dump_rest_of_file ();
939 close_output_file ();
941 exit (0);
943 else
944 regexp_error (p, repetition, ignore);
946 line_len = line->len;
947 if (line->str[line_len - 1] == '\n')
948 line_len--;
949 ret = re_search (&p->re_compiled, line->str, line_len,
950 0, line_len, (struct re_registers *) 0);
951 if (ret == -2)
953 error (0, 0, _("error in regular expression search"));
954 cleanup_fatal ();
956 if (ret >= 0)
957 break;
961 /* Account for any offset from this regexp. */
962 break_line = current_line + p->offset;
964 write_to_file (break_line, ignore, p->argnum);
966 if (!ignore)
967 close_output_file ();
969 current_line = break_line;
972 /* Split the input file according to the control records we have built. */
974 static void
975 split_file (void)
977 unsigned int i, j;
979 for (i = 0; i < control_used; i++)
981 if (controls[i].regexpr)
983 for (j = 0; (controls[i].repeat_forever
984 || j <= controls[i].repeat); j++)
985 process_regexp (&controls[i], j);
987 else
989 for (j = 0; (controls[i].repeat_forever
990 || j <= controls[i].repeat); j++)
991 process_line_count (&controls[i], j);
995 create_output_file ();
996 dump_rest_of_file ();
997 close_output_file ();
1000 /* Return the name of output file number NUM. */
1002 static char *
1003 make_filename (unsigned int num)
1005 strcpy (filename_space, prefix);
1006 if (suffix)
1007 sprintf (filename_space+strlen(prefix), suffix, num);
1008 else
1009 sprintf (filename_space+strlen(prefix), "%0*d", digits, num);
1010 return filename_space;
1013 /* Create the next output file. */
1015 static void
1016 create_output_file (void)
1018 output_filename = make_filename (files_created);
1019 output_stream = fopen (output_filename, "w");
1020 if (output_stream == NULL)
1022 error (0, errno, "%s", output_filename);
1023 cleanup_fatal ();
1025 files_created++;
1026 bytes_written = 0;
1029 /* Delete all the files we have created. */
1031 static void
1032 delete_all_files (void)
1034 unsigned int i;
1035 char *name;
1037 for (i = 0; i < files_created; i++)
1039 name = make_filename (i);
1040 if (unlink (name))
1041 error (0, errno, "%s", name);
1045 /* Close the current output file and print the count
1046 of characters in this file. */
1048 static void
1049 close_output_file (void)
1051 if (output_stream)
1053 if (fclose (output_stream) == EOF)
1055 error (0, errno, _("write error for `%s'"), output_filename);
1056 output_stream = NULL;
1057 cleanup_fatal ();
1059 if (bytes_written == 0 && elide_empty_files)
1061 if (unlink (output_filename))
1062 error (0, errno, "%s", output_filename);
1063 files_created--;
1065 else
1066 if (!suppress_count)
1067 fprintf (stdout, "%d\n", bytes_written);
1068 output_stream = NULL;
1072 /* Save line LINE to the output file and
1073 increment the character count for the current file. */
1075 static void
1076 save_line_to_file (const struct cstring *line)
1078 fwrite (line->str, sizeof (char), line->len, output_stream);
1079 bytes_written += line->len;
1082 /* Return a new, initialized control record. */
1084 static struct control *
1085 new_control_record (void)
1087 static unsigned control_allocated = 0; /* Total space allocated. */
1088 struct control *p;
1090 if (control_allocated == 0)
1092 control_allocated = ALLOC_SIZE;
1093 controls = (struct control *)
1094 xmalloc (sizeof (struct control) * control_allocated);
1096 else if (control_used == control_allocated)
1098 control_allocated += ALLOC_SIZE;
1099 controls = (struct control *)
1100 xrealloc ((char *) controls,
1101 sizeof (struct control) * control_allocated);
1103 p = &controls[control_used++];
1104 p->regexpr = NULL;
1105 p->repeat = 0;
1106 p->repeat_forever = 0;
1107 p->lines_required = 0;
1108 p->offset = 0;
1109 return p;
1112 /* Check if there is a numeric offset after a regular expression.
1113 STR is the entire command line argument.
1114 P is the control record for this regular expression.
1115 NUM is the numeric part of STR. */
1117 static void
1118 check_for_offset (struct control *p, const char *str, const char *num)
1120 unsigned long val;
1122 if (*num != '-' && *num != '+')
1123 error (1, 0, _("%s: `+' or `-' expected after delimeter"), str);
1125 if (xstrtoul (num + 1, NULL, 10, &val, NULL) != LONGINT_OK
1126 || val > UINT_MAX)
1127 error (1, 0, _("%s: integer expected after `%c'"), str, *num);
1128 p->offset = (unsigned int) val;
1130 if (*num == '-')
1131 p->offset = -p->offset;
1134 /* Given that the first character of command line arg STR is '{',
1135 make sure that the rest of the string is a valid repeat count
1136 and store its value in P.
1137 ARGNUM is the ARGV index of STR. */
1139 static void
1140 parse_repeat_count (int argnum, struct control *p, char *str)
1142 unsigned long val;
1143 char *end;
1145 end = str + strlen (str) - 1;
1146 if (*end != '}')
1147 error (1, 0, _("%s: `}' is required in repeat count"), str);
1148 *end = '\0';
1150 if (str+1 == end-1 && *(str+1) == '*')
1151 p->repeat_forever = 1;
1152 else
1154 if (xstrtoul (str + 1, NULL, 10, &val, NULL) != LONGINT_OK
1155 || val > UINT_MAX)
1157 error (1, 0, _("%s}: integer required between `{' and `}'"),
1158 global_argv[argnum]);
1160 p->repeat = (unsigned int) val;
1163 *end = '}';
1166 /* Extract the regular expression from STR and check for a numeric offset.
1167 STR should start with the regexp delimiter character.
1168 Return a new control record for the regular expression.
1169 ARGNUM is the ARGV index of STR.
1170 Unless IGNORE is TRUE, mark these lines for output. */
1172 static struct control *
1173 extract_regexp (int argnum, boolean ignore, char *str)
1175 int len; /* Number of chars in this regexp. */
1176 char delim = *str;
1177 char *closing_delim;
1178 struct control *p;
1179 const char *err;
1181 closing_delim = strrchr (str + 1, delim);
1182 if (closing_delim == NULL)
1183 error (1, 0, _("%s: closing delimeter `%c' missing"), str, delim);
1185 len = closing_delim - str - 1;
1186 p = new_control_record ();
1187 p->argnum = argnum;
1188 p->ignore = ignore;
1190 p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
1191 strncpy (p->regexpr, str + 1, len);
1192 p->re_compiled.allocated = len * 2;
1193 p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
1194 p->re_compiled.fastmap = xmalloc (256);
1195 p->re_compiled.translate = 0;
1196 #if !WITH_REGEX
1197 p->re_compiled.syntax_parens = 0;
1198 #endif
1199 err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
1200 if (err)
1202 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1203 cleanup_fatal ();
1206 if (closing_delim[1])
1207 check_for_offset (p, str, closing_delim + 1);
1209 return p;
1212 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1213 After each pattern, check if the next argument is a repeat count. */
1215 static void
1216 parse_patterns (int argc, int start, char **argv)
1218 int i; /* Index into ARGV. */
1219 struct control *p; /* New control record created. */
1220 unsigned long val;
1222 for (i = start; i < argc; i++)
1224 if (*argv[i] == '/' || *argv[i] == '%')
1226 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1228 else
1230 p = new_control_record ();
1231 p->argnum = i;
1232 if (xstrtoul (argv[i], NULL, 10, &val, NULL) != LONGINT_OK
1233 || val > INT_MAX)
1234 error (1, 0, _("%s: invalid pattern"), argv[i]);
1235 p->lines_required = (int) val;
1238 if (i + 1 < argc && *argv[i + 1] == '{')
1240 /* We have a repeat count. */
1241 i++;
1242 parse_repeat_count (i, p, argv[i]);
1247 static unsigned
1248 get_format_flags (char **format_ptr)
1250 unsigned count = 0;
1252 for (; **format_ptr; (*format_ptr)++)
1254 switch (**format_ptr)
1256 case '-':
1257 break;
1259 case '+':
1260 case ' ':
1261 count++;
1262 break;
1264 case '#':
1265 count += 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1266 break;
1268 default:
1269 return count;
1272 return count;
1275 static unsigned
1276 get_format_width (char **format_ptr)
1278 unsigned count = 0;
1279 char *start;
1280 int ch_save;
1282 start = *format_ptr;
1283 for (; **format_ptr; (*format_ptr)++)
1284 if (!ISDIGIT (**format_ptr))
1285 break;
1287 ch_save = **format_ptr;
1288 **format_ptr = '\0';
1289 /* In the case where no minimum field width is explicitly specified,
1290 allow for enough octal digits to represent the value of LONG_MAX. */
1291 count = ((*format_ptr == start)
1292 ? bytes_to_octal_digits[sizeof (long)]
1293 : atoi (start));
1294 **format_ptr = ch_save;
1295 return count;
1298 static unsigned
1299 get_format_prec (char **format_ptr)
1301 unsigned count = 0;
1302 char *start;
1303 int ch_save;
1304 int is_negative;
1306 if (**format_ptr != '.')
1307 return 0;
1308 (*format_ptr)++;
1310 if (**format_ptr == '-' || **format_ptr == '+')
1312 is_negative = (**format_ptr == '-');
1313 (*format_ptr)++;
1315 else
1317 is_negative = 0;
1320 start = *format_ptr;
1321 for (; **format_ptr; (*format_ptr)++)
1322 if (!ISDIGIT (**format_ptr))
1323 break;
1325 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1326 not there. */
1327 if (is_negative)
1328 start = *format_ptr;
1330 ch_save = **format_ptr;
1331 **format_ptr = '\0';
1332 count = (*format_ptr == start) ? 11 : atoi (start);
1333 **format_ptr = ch_save;
1335 return count;
1338 static void
1339 get_format_conv_type (char **format_ptr)
1341 int ch = *((*format_ptr)++);
1343 switch (ch)
1345 case 'd':
1346 case 'i':
1347 case 'o':
1348 case 'u':
1349 case 'x':
1350 case 'X':
1351 break;
1353 case 0:
1354 error (1, 0, _("missing conversion specifier in suffix"));
1355 break;
1357 default:
1358 if (ISPRINT (ch))
1359 error (1, 0, _("invalid conversion specifier in suffix: %c"), ch);
1360 else
1361 error (1, 0, _("invalid conversion specifier in suffix: \\%.3o"), ch);
1365 static unsigned
1366 max_out (char *format)
1368 unsigned out_count = 0;
1369 unsigned percents = 0;
1371 for (; *format; )
1373 int ch = *format++;
1375 if (ch != '%')
1376 out_count++;
1377 else
1379 percents++;
1380 out_count += get_format_flags (&format);
1382 int width = get_format_width (&format);
1383 int prec = get_format_prec (&format);
1385 out_count += MAX (width, prec);
1387 get_format_conv_type (&format);
1391 if (percents == 0)
1392 error (1, 0, _("missing %% conversion specification in suffix"));
1393 else if (percents > 1)
1394 error (1, 0, _("too many %% conversion specifications in suffix"));
1396 return out_count;
1400 main (int argc, char **argv)
1402 int optc;
1403 unsigned long val;
1404 #ifdef SA_INTERRUPT
1405 struct sigaction oldact, newact;
1406 #endif
1408 program_name = argv[0];
1409 setlocale (LC_ALL, "");
1410 bindtextdomain (PACKAGE, LOCALEDIR);
1411 textdomain (PACKAGE);
1413 global_argv = argv;
1414 controls = NULL;
1415 control_used = 0;
1416 suppress_count = FALSE;
1417 remove_files = TRUE;
1418 prefix = DEFAULT_PREFIX;
1420 #ifdef SA_INTERRUPT
1421 newact.sa_handler = interrupt_handler;
1422 sigemptyset (&newact.sa_mask);
1423 newact.sa_flags = 0;
1425 sigaction (SIGHUP, NULL, &oldact);
1426 if (oldact.sa_handler != SIG_IGN)
1427 sigaction (SIGHUP, &newact, NULL);
1429 sigaction (SIGINT, NULL, &oldact);
1430 if (oldact.sa_handler != SIG_IGN)
1431 sigaction (SIGINT, &newact, NULL);
1433 sigaction (SIGQUIT, NULL, &oldact);
1434 if (oldact.sa_handler != SIG_IGN)
1435 sigaction (SIGQUIT, &newact, NULL);
1437 sigaction (SIGTERM, NULL, &oldact);
1438 if (oldact.sa_handler != SIG_IGN)
1439 sigaction (SIGTERM, &newact, NULL);
1440 #else /* not SA_INTERRUPT */
1441 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1442 signal (SIGHUP, interrupt_handler);
1443 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1444 signal (SIGINT, interrupt_handler);
1445 if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
1446 signal (SIGQUIT, interrupt_handler);
1447 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1448 signal (SIGTERM, interrupt_handler);
1449 #endif /* not SA_INTERRUPT */
1451 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, (int *) 0))
1452 != EOF)
1453 switch (optc)
1455 case 0:
1456 break;
1458 case 'f':
1459 prefix = optarg;
1460 break;
1462 case 'b':
1463 suffix = optarg;
1464 break;
1466 case 'k':
1467 remove_files = FALSE;
1468 break;
1470 case 'n':
1471 if (xstrtoul (optarg, NULL, 10, &val, NULL) != LONGINT_OK
1472 || val > INT_MAX)
1473 error (1, 0, _("%s: invalid number"), optarg);
1474 digits = (int) val;
1475 break;
1477 case 's':
1478 case 'q':
1479 suppress_count = TRUE;
1480 break;
1482 case 'z':
1483 elide_empty_files = TRUE;
1484 break;
1486 default:
1487 usage (1);
1490 if (show_version)
1492 printf ("csplit - %s\n", PACKAGE_VERSION);
1493 exit (0);
1496 if (show_help)
1497 usage (0);
1499 if (argc - optind < 2)
1501 error (0, 0, _("too few arguments"));
1502 usage (1);
1505 if (suffix)
1506 filename_space = (char *) xmalloc (strlen (prefix) + max_out (suffix) + 2);
1507 else
1508 filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
1510 set_input_file (argv[optind++]);
1512 parse_patterns (argc, optind, argv);
1514 split_file ();
1516 if (close (input_desc) < 0)
1518 error (0, errno, _("read error"));
1519 cleanup_fatal ();
1522 exit (0);
1525 static void
1526 usage (int status)
1528 if (status != 0)
1529 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1530 program_name);
1531 else
1533 printf (_("\
1534 Usage: %s [OPTION]... FILE PATTERN...\n\
1536 program_name);
1537 printf (_("\
1538 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1539 and output byte counts of each piece to standard output.\n\
1541 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1542 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1543 -k, --keep-files do not remove output files on errors\n\
1544 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1545 -s, --quiet, --silent do not print counts of output file sizes\n\
1546 -z, --elide-empty-files remove empty output files\n\
1547 --help display this help and exit\n\
1548 --version output version information and exit\n\
1550 Read standard input if FILE is -. Each PATTERN may be:\n\
1552 INTEGER copy up to but not including specified line number\n\
1553 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1554 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1555 {INTEGER} repeat the previous pattern specified number of times\n\
1556 {*} repeat the previous pattern as many times as possible\n\
1558 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1559 "));
1561 exit (status);