1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 1991, 1995 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
25 #include <sys/types.h>
29 #endif /* HAVE_LIMITS_H */
32 # define UINT_MAX ((unsigned int) ~(unsigned int) 0)
36 # define INT_MAX ((int) (UINT_MAX >> 1))
56 #define MAX(a,b) (((a) > (b)) ? (a) : (b))
64 /* Increment size of area for control records. */
67 /* The default prefix for output file names. */
68 #define DEFAULT_PREFIX "xx"
72 /* A compiled pattern arg. */
75 char *regexpr
; /* Non-compiled regular expression. */
76 struct re_pattern_buffer re_compiled
; /* Compiled regular expression. */
77 int offset
; /* Offset from regexp to split at. */
78 int lines_required
; /* Number of lines required. */
79 unsigned int repeat
; /* Repeat count. */
80 int repeat_forever
; /* Non-zero if `*' used as a repeat count. */
81 int argnum
; /* ARGV index. */
82 boolean ignore
; /* If true, produce no output (for regexp). */
85 /* Initial size of data area in buffers. */
86 #define START_SIZE 8191
88 /* Increment size for data area. */
89 #define INCR_SIZE 2048
91 /* Number of lines kept in each node in line list. */
95 /* Some small values to test the algorithms. */
96 #define START_SIZE 200
101 /* A string with a length count. */
108 /* Pointers to the beginnings of lines in the buffer area.
109 These structures are linked together if needed. */
112 unsigned used
; /* Number of offsets used in this struct. */
113 unsigned insert_index
; /* Next offset to use when inserting line. */
114 unsigned retrieve_index
; /* Next index to use when retrieving line. */
115 struct cstring starts
[CTRL_SIZE
]; /* Lines in the data area. */
116 struct line
*next
; /* Next in linked list. */
119 /* The structure to hold the input lines.
120 Contains a pointer to the data area and a list containing
121 pointers to the individual lines. */
124 unsigned bytes_alloc
; /* Size of the buffer area. */
125 unsigned bytes_used
; /* Bytes used in the buffer area. */
126 unsigned start_line
; /* First line number in this buffer. */
127 unsigned first_available
; /* First line that can be retrieved. */
128 unsigned num_lines
; /* Number of complete lines in this buffer. */
129 char *buffer
; /* Data area. */
130 struct line
*line_start
; /* Head of list of pointers to lines. */
131 struct line
*curr_line
; /* The line start record currently in use. */
132 struct buffer_record
*next
;
137 static void close_output_file
__P ((void));
138 static void create_output_file
__P ((void));
139 static void delete_all_files
__P ((void));
140 static void save_line_to_file
__P ((const struct cstring
*line
));
141 static void usage
__P ((int status
));
143 /* The name this program was run with. */
146 /* Convert the number of 8-bit bytes of a binary representation to
147 the number of characters required to represent the same quantity
148 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
149 require a field width as wide as 11 characters. */
150 static const unsigned int bytes_to_octal_digits
[] =
151 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
153 /* Input file descriptor. */
154 static int input_desc
= 0;
156 /* List of available buffers. */
157 static struct buffer_record
*free_list
= NULL
;
159 /* Start of buffer list. */
160 static struct buffer_record
*head
= NULL
;
162 /* Partially read line. */
163 static char *hold_area
= NULL
;
165 /* Number of chars in `hold_area'. */
166 static unsigned hold_count
= 0;
168 /* Number of the last line in the buffers. */
169 static unsigned last_line_number
= 0;
171 /* Number of the line currently being examined. */
172 static unsigned current_line
= 0;
174 /* If TRUE, we have read EOF. */
175 static boolean have_read_eof
= FALSE
;
177 /* Name of output files. */
178 static char *filename_space
= NULL
;
180 /* Prefix part of output file names. */
181 static char *prefix
= NULL
;
183 /* Suffix part of output file names. */
184 static char *suffix
= NULL
;
186 /* Number of digits to use in output file names. */
187 static int digits
= 2;
189 /* Number of files created so far. */
190 static unsigned int files_created
= 0;
192 /* Number of bytes written to current file. */
193 static unsigned int bytes_written
;
195 /* Output file pointer. */
196 static FILE *output_stream
= NULL
;
198 /* Output file name. */
199 static char *output_filename
= NULL
;
201 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
202 static char **global_argv
;
204 /* If TRUE, do not print the count of bytes in each output file. */
205 static boolean suppress_count
;
207 /* If TRUE, remove output files on error. */
208 static boolean remove_files
;
210 /* If TRUE, remove all output files which have a zero length. */
211 static boolean elide_empty_files
;
213 /* The compiled pattern arguments, which determine how to split
215 static struct control
*controls
;
217 /* Number of elements in `controls'. */
218 static unsigned int control_used
;
220 /* If nonzero, display usage information and exit. */
221 static int show_help
;
223 /* If nonzero, print the version on standard output then exit. */
224 static int show_version
;
226 static struct option
const longopts
[] =
228 {"digits", required_argument
, NULL
, 'n'},
229 {"quiet", no_argument
, NULL
, 'q'},
230 {"silent", no_argument
, NULL
, 's'},
231 {"keep-files", no_argument
, NULL
, 'k'},
232 {"elide-empty-files", no_argument
, NULL
, 'z'},
233 {"prefix", required_argument
, NULL
, 'f'},
234 {"suffix-format", required_argument
, NULL
, 'b'},
235 {"help", no_argument
, &show_help
, 1},
236 {"version", no_argument
, &show_version
, 1},
240 /* Optionally remove files created so far; then exit.
241 Called when an error detected. */
247 close_output_file ();
261 interrupt_handler (int sig
)
264 struct sigaction sigact
;
266 sigact
.sa_handler
= SIG_DFL
;
267 sigemptyset (&sigact
.sa_mask
);
269 sigaction (sig
, &sigact
, NULL
);
270 #else /* !SA_INTERRUPT */
271 signal (sig
, SIG_DFL
);
272 #endif /* SA_INTERRUPT */
274 kill (getpid (), sig
);
277 /* Allocate N bytes of memory dynamically, with error checking. */
280 xmalloc (unsigned int n
)
287 error (0, 0, _("virtual memory exhausted"));
293 /* Change the size of an allocated block of memory P to N bytes,
295 If P is NULL, run xmalloc.
296 If N is 0, run free and return NULL. */
299 xrealloc (char *p
, unsigned int n
)
311 error (0, 0, _("virtual memory exhausted"));
317 /* Keep track of NUM chars of a partial line in buffer START.
318 These chars will be retrieved later when another large buffer is read.
319 It is not necessary to create a new buffer for these chars; instead,
320 we keep a pointer to the existing buffer. This buffer *is* on the
321 free list, and when the next buffer is obtained from this list
322 (even if it is this one), these chars will be placed at the
323 start of the new buffer. */
326 save_to_hold_area (char *start
, unsigned int num
)
332 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
333 Return the number of chars read. */
336 read_input (char *dest
, unsigned int max_n_bytes
)
340 if (max_n_bytes
== 0)
343 bytes_read
= safe_read (input_desc
, dest
, max_n_bytes
);
346 have_read_eof
= TRUE
;
350 error (0, errno
, _("read error"));
357 /* Initialize existing line record P. */
360 clear_line_control (struct line
*p
)
364 p
->retrieve_index
= 0;
367 /* Initialize all line records in B. */
370 clear_all_line_control (struct buffer_record
*b
)
374 for (l
= b
->line_start
; l
; l
= l
->next
)
375 clear_line_control (l
);
378 /* Return a new, initialized line record. */
381 new_line_control (void)
385 p
= (struct line
*) xmalloc (sizeof (struct line
));
388 clear_line_control (p
);
393 /* Record LINE_START, which is the address of the start of a line
394 of length LINE_LEN in the large buffer, in the lines buffer of B. */
397 keep_new_line (struct buffer_record
*b
, char *line_start
, int line_len
)
401 /* If there is no existing area to keep line info, get some. */
402 if (b
->line_start
== NULL
)
403 b
->line_start
= b
->curr_line
= new_line_control ();
405 /* If existing area for lines is full, get more. */
406 if (b
->curr_line
->used
== CTRL_SIZE
)
408 b
->curr_line
->next
= new_line_control ();
409 b
->curr_line
= b
->curr_line
->next
;
414 /* Record the start of the line, and update counters. */
415 l
->starts
[l
->insert_index
].str
= line_start
;
416 l
->starts
[l
->insert_index
].len
= line_len
;
421 /* Scan the buffer in B for newline characters
422 and record the line start locations and lengths in B.
423 Return the number of lines found in this buffer.
425 There may be an incomplete line at the end of the buffer;
426 a pointer is kept to this area, which will be used when
427 the next buffer is filled. */
430 record_line_starts (struct buffer_record
*b
)
432 char *line_start
; /* Start of current line. */
433 char *line_end
; /* End of each line found. */
434 unsigned int bytes_left
; /* Length of incomplete last line. */
435 unsigned int lines
; /* Number of lines found. */
436 unsigned int line_length
; /* Length of each line found. */
438 if (b
->bytes_used
== 0)
442 line_start
= b
->buffer
;
443 bytes_left
= b
->bytes_used
;
447 line_end
= memchr (line_start
, '\n', bytes_left
);
448 if (line_end
== NULL
)
450 line_length
= line_end
- line_start
+ 1;
451 keep_new_line (b
, line_start
, line_length
);
452 bytes_left
-= line_length
;
453 line_start
= line_end
+ 1;
457 /* Check for an incomplete last line. */
462 keep_new_line (b
, line_start
, bytes_left
);
466 save_to_hold_area (line_start
, bytes_left
);
469 b
->num_lines
= lines
;
470 b
->first_available
= b
->start_line
= last_line_number
+ 1;
471 last_line_number
+= lines
;
476 /* Return a new buffer with room to store SIZE bytes, plus
477 an extra byte for safety. */
479 static struct buffer_record
*
480 create_new_buffer (unsigned int size
)
482 struct buffer_record
*new_buffer
;
484 new_buffer
= (struct buffer_record
*)
485 xmalloc (sizeof (struct buffer_record
));
487 new_buffer
->buffer
= (char *) xmalloc (size
+ 1);
489 new_buffer
->bytes_alloc
= size
;
490 new_buffer
->line_start
= new_buffer
->curr_line
= NULL
;
495 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
496 least that size is currently free, use it, otherwise create a new one. */
498 static struct buffer_record
*
499 get_new_buffer (unsigned int min_size
)
501 struct buffer_record
*p
, *q
;
502 struct buffer_record
*new_buffer
; /* Buffer to return. */
503 unsigned int alloc_size
; /* Actual size that will be requested. */
505 alloc_size
= START_SIZE
;
506 while (min_size
> alloc_size
)
507 alloc_size
+= INCR_SIZE
;
509 if (free_list
== NULL
)
510 new_buffer
= create_new_buffer (alloc_size
);
513 /* Use first-fit to find a buffer. */
514 p
= new_buffer
= NULL
;
519 if (q
->bytes_alloc
>= min_size
)
532 new_buffer
= (q
? q
: create_new_buffer (alloc_size
));
534 new_buffer
->curr_line
= new_buffer
->line_start
;
535 clear_all_line_control (new_buffer
);
538 new_buffer
->num_lines
= 0;
539 new_buffer
->bytes_used
= 0;
540 new_buffer
->start_line
= new_buffer
->first_available
= last_line_number
+ 1;
541 new_buffer
->next
= NULL
;
546 /* Add buffer BUF to the list of free buffers. */
549 free_buffer (struct buffer_record
*buf
)
551 buf
->next
= free_list
;
555 /* Append buffer BUF to the linked list of buffers that contain
556 some data yet to be processed. */
559 save_buffer (struct buffer_record
*buf
)
561 struct buffer_record
*p
;
564 buf
->curr_line
= buf
->line_start
;
570 for (p
= head
; p
->next
; p
= p
->next
)
576 /* Fill a buffer of input.
578 Set the initial size of the buffer to a default.
579 Fill the buffer (from the hold area and input stream)
580 and find the individual lines.
581 If no lines are found (the buffer is too small to hold the next line),
582 release the current buffer (whose contents would have been put in the
583 hold area) and repeat the process with another large buffer until at least
584 one entire line has been read.
586 Return TRUE if a new buffer was obtained, otherwise false
587 (in which case end-of-file must have been encountered). */
592 struct buffer_record
*b
;
593 unsigned int bytes_wanted
= START_SIZE
; /* Minimum buffer size. */
594 unsigned int bytes_avail
; /* Size of new buffer created. */
595 unsigned int lines_found
; /* Number of lines in this new buffer. */
596 char *p
; /* Place to load into buffer. */
601 /* We must make the buffer at least as large as the amount of data
602 in the partial line left over from the last call. */
603 if (bytes_wanted
< hold_count
)
604 bytes_wanted
= hold_count
;
608 b
= get_new_buffer (bytes_wanted
);
609 bytes_avail
= b
->bytes_alloc
; /* Size of buffer returned. */
612 /* First check the `holding' area for a partial line. */
616 memcpy (p
, hold_area
, hold_count
);
618 b
->bytes_used
+= hold_count
;
619 bytes_avail
-= hold_count
;
623 b
->bytes_used
+= (unsigned int) read_input (p
, bytes_avail
);
625 lines_found
= record_line_starts (b
);
626 bytes_wanted
= b
->bytes_alloc
* 2;
630 while (!lines_found
&& !have_read_eof
);
635 return lines_found
!= 0;
638 /* Return the line number of the first line that has not yet been retrieved. */
641 get_first_line_in_buffer (void)
643 if (head
== NULL
&& !load_buffer ())
644 error (1, errno
, _("input disappeared"));
646 return head
->first_available
;
649 /* Return a pointer to the logical first line in the buffer and make the
650 next line the logical first line.
651 Return NULL if there is no more input. */
653 static struct cstring
*
656 struct cstring
*line
; /* Return value. */
657 struct line
*l
; /* For convenience. */
659 if (head
== NULL
&& !load_buffer ())
662 if (current_line
< head
->first_available
)
663 current_line
= head
->first_available
;
665 ++(head
->first_available
);
669 line
= &l
->starts
[l
->retrieve_index
];
671 /* Advance index to next line. */
672 if (++l
->retrieve_index
== l
->used
)
674 /* Go on to the next line record. */
675 head
->curr_line
= l
->next
;
676 if (head
->curr_line
== NULL
|| head
->curr_line
->used
== 0)
678 /* Go on to the next data block. */
679 struct buffer_record
*b
= head
;
688 /* Search the buffers for line LINENUM, reading more input if necessary.
689 Return a pointer to the line, or NULL if it is not found in the file. */
691 static struct cstring
*
692 find_line (unsigned int linenum
)
694 struct buffer_record
*b
;
696 if (head
== NULL
&& !load_buffer ())
699 if (linenum
< head
->start_line
)
704 if (linenum
< b
->start_line
+ b
->num_lines
)
706 /* The line is in this buffer. */
708 unsigned int offset
; /* How far into the buffer the line is. */
711 offset
= linenum
- b
->start_line
;
712 /* Find the control record. */
713 while (offset
>= CTRL_SIZE
)
718 return &l
->starts
[offset
];
720 if (b
->next
== NULL
&& !load_buffer ())
722 b
= b
->next
; /* Try the next data block. */
726 /* Return TRUE if at least one more line is available for input. */
731 return (find_line (current_line
+ 1) == NULL
) ? TRUE
: FALSE
;
734 /* Set the name of the input file to NAME and open it. */
737 set_input_file (const char *name
)
739 if (!strcmp (name
, "-"))
743 input_desc
= open (name
, O_RDONLY
);
745 error (1, errno
, "%s", name
);
749 /* Write all lines from the beginning of the buffer up to, but
750 not including, line LAST_LINE, to the current output file.
751 If IGNORE is TRUE, do not output lines selected here.
752 ARGNUM is the index in ARGV of the current pattern. */
755 write_to_file (unsigned int last_line
, boolean ignore
, int argnum
)
757 struct cstring
*line
;
758 unsigned int first_line
; /* First available input line. */
759 unsigned int lines
; /* Number of lines to output. */
762 first_line
= get_first_line_in_buffer ();
764 if (first_line
> last_line
)
766 error (0, 0, _("%s: line number out of range"), global_argv
[argnum
]);
770 lines
= last_line
- first_line
;
772 for (i
= 0; i
< lines
; i
++)
774 line
= remove_line ();
777 error (0, 0, _("%s: line number out of range"), global_argv
[argnum
]);
781 save_line_to_file (line
);
785 /* Output any lines left after all regexps have been processed. */
788 dump_rest_of_file (void)
790 struct cstring
*line
;
792 while ((line
= remove_line ()) != NULL
)
793 save_line_to_file (line
);
796 /* Handle an attempt to read beyond EOF under the control of record P,
797 on iteration REPETITION if nonzero. */
800 handle_line_error (const struct control
*p
, int repetition
)
802 fprintf (stderr
, _("%s: `%d': line number out of range"),
803 program_name
, p
->lines_required
);
805 fprintf (stderr
, _(" on repetition %d\n"), repetition
);
807 fprintf (stderr
, "\n");
812 /* Determine the line number that marks the end of this file,
813 then get those lines and save them to the output file.
814 P is the control record.
815 REPETITION is the repetition number. */
818 process_line_count (const struct control
*p
, int repetition
)
820 unsigned int linenum
;
821 unsigned int last_line_to_save
= p
->lines_required
* (repetition
+ 1);
822 struct cstring
*line
;
824 create_output_file ();
826 linenum
= get_first_line_in_buffer ();
828 /* Check for requesting a line that has already been written out.
829 If this ever happens, it's due to a bug in csplit. */
830 if (linenum
>= last_line_to_save
)
833 while (linenum
++ < last_line_to_save
)
835 line
= remove_line ();
837 handle_line_error (p
, repetition
);
838 save_line_to_file (line
);
841 close_output_file ();
843 /* Ensure that the line number specified is not 1 greater than
844 the number of lines in the file. */
845 if (no_more_lines ())
846 handle_line_error (p
, repetition
);
850 regexp_error (struct control
*p
, int repetition
, boolean ignore
)
852 fprintf (stderr
, _("%s: `%s': match not found"),
853 program_name
, global_argv
[p
->argnum
]);
856 fprintf (stderr
, _(" on repetition %d\n"), repetition
);
858 fprintf (stderr
, "\n");
862 dump_rest_of_file ();
863 close_output_file ();
868 /* Read the input until a line matches the regexp in P, outputting
869 it unless P->IGNORE is TRUE.
870 REPETITION is this repeat-count; 0 means the first time. */
873 process_regexp (struct control
*p
, int repetition
)
875 struct cstring
*line
; /* From input file. */
876 unsigned int line_len
; /* To make "$" in regexps work. */
877 unsigned int break_line
; /* First line number of next file. */
878 boolean ignore
= p
->ignore
; /* If TRUE, skip this section. */
882 create_output_file ();
884 /* If there is no offset for the regular expression, or
885 it is positive, then it is not necessary to buffer the lines. */
891 line
= find_line (++current_line
);
894 if (p
->repeat_forever
)
898 dump_rest_of_file ();
899 close_output_file ();
904 regexp_error (p
, repetition
, ignore
);
906 line_len
= line
->len
;
907 if (line
->str
[line_len
- 1] == '\n')
909 ret
= re_search (&p
->re_compiled
, line
->str
, line_len
,
910 0, line_len
, (struct re_registers
*) 0);
913 error (0, 0, _("error in regular expression search"));
918 line
= remove_line ();
920 save_line_to_file (line
);
928 /* Buffer the lines. */
931 line
= find_line (++current_line
);
934 if (p
->repeat_forever
)
938 dump_rest_of_file ();
939 close_output_file ();
944 regexp_error (p
, repetition
, ignore
);
946 line_len
= line
->len
;
947 if (line
->str
[line_len
- 1] == '\n')
949 ret
= re_search (&p
->re_compiled
, line
->str
, line_len
,
950 0, line_len
, (struct re_registers
*) 0);
953 error (0, 0, _("error in regular expression search"));
961 /* Account for any offset from this regexp. */
962 break_line
= current_line
+ p
->offset
;
964 write_to_file (break_line
, ignore
, p
->argnum
);
967 close_output_file ();
969 current_line
= break_line
;
972 /* Split the input file according to the control records we have built. */
979 for (i
= 0; i
< control_used
; i
++)
981 if (controls
[i
].regexpr
)
983 for (j
= 0; (controls
[i
].repeat_forever
984 || j
<= controls
[i
].repeat
); j
++)
985 process_regexp (&controls
[i
], j
);
989 for (j
= 0; (controls
[i
].repeat_forever
990 || j
<= controls
[i
].repeat
); j
++)
991 process_line_count (&controls
[i
], j
);
995 create_output_file ();
996 dump_rest_of_file ();
997 close_output_file ();
1000 /* Return the name of output file number NUM. */
1003 make_filename (unsigned int num
)
1005 strcpy (filename_space
, prefix
);
1007 sprintf (filename_space
+strlen(prefix
), suffix
, num
);
1009 sprintf (filename_space
+strlen(prefix
), "%0*d", digits
, num
);
1010 return filename_space
;
1013 /* Create the next output file. */
1016 create_output_file (void)
1018 output_filename
= make_filename (files_created
);
1019 output_stream
= fopen (output_filename
, "w");
1020 if (output_stream
== NULL
)
1022 error (0, errno
, "%s", output_filename
);
1029 /* Delete all the files we have created. */
1032 delete_all_files (void)
1037 for (i
= 0; i
< files_created
; i
++)
1039 name
= make_filename (i
);
1041 error (0, errno
, "%s", name
);
1045 /* Close the current output file and print the count
1046 of characters in this file. */
1049 close_output_file (void)
1053 if (fclose (output_stream
) == EOF
)
1055 error (0, errno
, _("write error for `%s'"), output_filename
);
1056 output_stream
= NULL
;
1059 if (bytes_written
== 0 && elide_empty_files
)
1061 if (unlink (output_filename
))
1062 error (0, errno
, "%s", output_filename
);
1066 if (!suppress_count
)
1067 fprintf (stdout
, "%d\n", bytes_written
);
1068 output_stream
= NULL
;
1072 /* Save line LINE to the output file and
1073 increment the character count for the current file. */
1076 save_line_to_file (const struct cstring
*line
)
1078 fwrite (line
->str
, sizeof (char), line
->len
, output_stream
);
1079 bytes_written
+= line
->len
;
1082 /* Return a new, initialized control record. */
1084 static struct control
*
1085 new_control_record (void)
1087 static unsigned control_allocated
= 0; /* Total space allocated. */
1090 if (control_allocated
== 0)
1092 control_allocated
= ALLOC_SIZE
;
1093 controls
= (struct control
*)
1094 xmalloc (sizeof (struct control
) * control_allocated
);
1096 else if (control_used
== control_allocated
)
1098 control_allocated
+= ALLOC_SIZE
;
1099 controls
= (struct control
*)
1100 xrealloc ((char *) controls
,
1101 sizeof (struct control
) * control_allocated
);
1103 p
= &controls
[control_used
++];
1106 p
->repeat_forever
= 0;
1107 p
->lines_required
= 0;
1112 /* Check if there is a numeric offset after a regular expression.
1113 STR is the entire command line argument.
1114 P is the control record for this regular expression.
1115 NUM is the numeric part of STR. */
1118 check_for_offset (struct control
*p
, const char *str
, const char *num
)
1122 if (*num
!= '-' && *num
!= '+')
1123 error (1, 0, _("%s: `+' or `-' expected after delimeter"), str
);
1125 if (xstrtoul (num
+ 1, NULL
, 10, &val
, NULL
) != LONGINT_OK
1127 error (1, 0, _("%s: integer expected after `%c'"), str
, *num
);
1128 p
->offset
= (unsigned int) val
;
1131 p
->offset
= -p
->offset
;
1134 /* Given that the first character of command line arg STR is '{',
1135 make sure that the rest of the string is a valid repeat count
1136 and store its value in P.
1137 ARGNUM is the ARGV index of STR. */
1140 parse_repeat_count (int argnum
, struct control
*p
, char *str
)
1145 end
= str
+ strlen (str
) - 1;
1147 error (1, 0, _("%s: `}' is required in repeat count"), str
);
1150 if (str
+1 == end
-1 && *(str
+1) == '*')
1151 p
->repeat_forever
= 1;
1154 if (xstrtoul (str
+ 1, NULL
, 10, &val
, NULL
) != LONGINT_OK
1157 error (1, 0, _("%s}: integer required between `{' and `}'"),
1158 global_argv
[argnum
]);
1160 p
->repeat
= (unsigned int) val
;
1166 /* Extract the regular expression from STR and check for a numeric offset.
1167 STR should start with the regexp delimiter character.
1168 Return a new control record for the regular expression.
1169 ARGNUM is the ARGV index of STR.
1170 Unless IGNORE is TRUE, mark these lines for output. */
1172 static struct control
*
1173 extract_regexp (int argnum
, boolean ignore
, char *str
)
1175 int len
; /* Number of chars in this regexp. */
1177 char *closing_delim
;
1181 closing_delim
= strrchr (str
+ 1, delim
);
1182 if (closing_delim
== NULL
)
1183 error (1, 0, _("%s: closing delimeter `%c' missing"), str
, delim
);
1185 len
= closing_delim
- str
- 1;
1186 p
= new_control_record ();
1190 p
->regexpr
= (char *) xmalloc ((unsigned) (len
+ 1));
1191 strncpy (p
->regexpr
, str
+ 1, len
);
1192 p
->re_compiled
.allocated
= len
* 2;
1193 p
->re_compiled
.buffer
= (unsigned char *) xmalloc (p
->re_compiled
.allocated
);
1194 p
->re_compiled
.fastmap
= xmalloc (256);
1195 p
->re_compiled
.translate
= 0;
1197 p
->re_compiled
.syntax_parens
= 0;
1199 err
= re_compile_pattern (p
->regexpr
, len
, &p
->re_compiled
);
1202 error (0, 0, _("%s: invalid regular expression: %s"), str
, err
);
1206 if (closing_delim
[1])
1207 check_for_offset (p
, str
, closing_delim
+ 1);
1212 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1213 After each pattern, check if the next argument is a repeat count. */
1216 parse_patterns (int argc
, int start
, char **argv
)
1218 int i
; /* Index into ARGV. */
1219 struct control
*p
; /* New control record created. */
1222 for (i
= start
; i
< argc
; i
++)
1224 if (*argv
[i
] == '/' || *argv
[i
] == '%')
1226 p
= extract_regexp (i
, *argv
[i
] == '%', argv
[i
]);
1230 p
= new_control_record ();
1232 if (xstrtoul (argv
[i
], NULL
, 10, &val
, NULL
) != LONGINT_OK
1234 error (1, 0, _("%s: invalid pattern"), argv
[i
]);
1235 p
->lines_required
= (int) val
;
1238 if (i
+ 1 < argc
&& *argv
[i
+ 1] == '{')
1240 /* We have a repeat count. */
1242 parse_repeat_count (i
, p
, argv
[i
]);
1248 get_format_flags (char **format_ptr
)
1252 for (; **format_ptr
; (*format_ptr
)++)
1254 switch (**format_ptr
)
1265 count
+= 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1276 get_format_width (char **format_ptr
)
1282 start
= *format_ptr
;
1283 for (; **format_ptr
; (*format_ptr
)++)
1284 if (!ISDIGIT (**format_ptr
))
1287 ch_save
= **format_ptr
;
1288 **format_ptr
= '\0';
1289 /* In the case where no minimum field width is explicitly specified,
1290 allow for enough octal digits to represent the value of LONG_MAX. */
1291 count
= ((*format_ptr
== start
)
1292 ? bytes_to_octal_digits
[sizeof (long)]
1294 **format_ptr
= ch_save
;
1299 get_format_prec (char **format_ptr
)
1306 if (**format_ptr
!= '.')
1310 if (**format_ptr
== '-' || **format_ptr
== '+')
1312 is_negative
= (**format_ptr
== '-');
1320 start
= *format_ptr
;
1321 for (; **format_ptr
; (*format_ptr
)++)
1322 if (!ISDIGIT (**format_ptr
))
1325 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1328 start
= *format_ptr
;
1330 ch_save
= **format_ptr
;
1331 **format_ptr
= '\0';
1332 count
= (*format_ptr
== start
) ? 11 : atoi (start
);
1333 **format_ptr
= ch_save
;
1339 get_format_conv_type (char **format_ptr
)
1341 int ch
= *((*format_ptr
)++);
1354 error (1, 0, _("missing conversion specifier in suffix"));
1359 error (1, 0, _("invalid conversion specifier in suffix: %c"), ch
);
1361 error (1, 0, _("invalid conversion specifier in suffix: \\%.3o"), ch
);
1366 max_out (char *format
)
1368 unsigned out_count
= 0;
1369 unsigned percents
= 0;
1380 out_count
+= get_format_flags (&format
);
1382 int width
= get_format_width (&format
);
1383 int prec
= get_format_prec (&format
);
1385 out_count
+= MAX (width
, prec
);
1387 get_format_conv_type (&format
);
1392 error (1, 0, _("missing %% conversion specification in suffix"));
1393 else if (percents
> 1)
1394 error (1, 0, _("too many %% conversion specifications in suffix"));
1400 main (int argc
, char **argv
)
1405 struct sigaction oldact
, newact
;
1408 program_name
= argv
[0];
1409 setlocale (LC_ALL
, "");
1410 bindtextdomain (PACKAGE
, LOCALEDIR
);
1411 textdomain (PACKAGE
);
1416 suppress_count
= FALSE
;
1417 remove_files
= TRUE
;
1418 prefix
= DEFAULT_PREFIX
;
1421 newact
.sa_handler
= interrupt_handler
;
1422 sigemptyset (&newact
.sa_mask
);
1423 newact
.sa_flags
= 0;
1425 sigaction (SIGHUP
, NULL
, &oldact
);
1426 if (oldact
.sa_handler
!= SIG_IGN
)
1427 sigaction (SIGHUP
, &newact
, NULL
);
1429 sigaction (SIGINT
, NULL
, &oldact
);
1430 if (oldact
.sa_handler
!= SIG_IGN
)
1431 sigaction (SIGINT
, &newact
, NULL
);
1433 sigaction (SIGQUIT
, NULL
, &oldact
);
1434 if (oldact
.sa_handler
!= SIG_IGN
)
1435 sigaction (SIGQUIT
, &newact
, NULL
);
1437 sigaction (SIGTERM
, NULL
, &oldact
);
1438 if (oldact
.sa_handler
!= SIG_IGN
)
1439 sigaction (SIGTERM
, &newact
, NULL
);
1440 #else /* not SA_INTERRUPT */
1441 if (signal (SIGHUP
, SIG_IGN
) != SIG_IGN
)
1442 signal (SIGHUP
, interrupt_handler
);
1443 if (signal (SIGINT
, SIG_IGN
) != SIG_IGN
)
1444 signal (SIGINT
, interrupt_handler
);
1445 if (signal (SIGQUIT
, SIG_IGN
) != SIG_IGN
)
1446 signal (SIGQUIT
, interrupt_handler
);
1447 if (signal (SIGTERM
, SIG_IGN
) != SIG_IGN
)
1448 signal (SIGTERM
, interrupt_handler
);
1449 #endif /* not SA_INTERRUPT */
1451 while ((optc
= getopt_long (argc
, argv
, "f:b:kn:sqz", longopts
, (int *) 0))
1467 remove_files
= FALSE
;
1471 if (xstrtoul (optarg
, NULL
, 10, &val
, NULL
) != LONGINT_OK
1473 error (1, 0, _("%s: invalid number"), optarg
);
1479 suppress_count
= TRUE
;
1483 elide_empty_files
= TRUE
;
1492 printf ("csplit - %s\n", PACKAGE_VERSION
);
1499 if (argc
- optind
< 2)
1501 error (0, 0, _("too few arguments"));
1506 filename_space
= (char *) xmalloc (strlen (prefix
) + max_out (suffix
) + 2);
1508 filename_space
= (char *) xmalloc (strlen (prefix
) + digits
+ 2);
1510 set_input_file (argv
[optind
++]);
1512 parse_patterns (argc
, optind
, argv
);
1516 if (close (input_desc
) < 0)
1518 error (0, errno
, _("read error"));
1529 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
1534 Usage: %s [OPTION]... FILE PATTERN...\n\
1538 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1539 and output byte counts of each piece to standard output.\n\
1541 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1542 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1543 -k, --keep-files do not remove output files on errors\n\
1544 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1545 -s, --quiet, --silent do not print counts of output file sizes\n\
1546 -z, --elide-empty-files remove empty output files\n\
1547 --help display this help and exit\n\
1548 --version output version information and exit\n\
1550 Read standard input if FILE is -. Each PATTERN may be:\n\
1552 INTEGER copy up to but not including specified line number\n\
1553 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1554 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1555 {INTEGER} repeat the previous pattern specified number of times\n\
1556 {*} repeat the previous pattern as many times as possible\n\
1558 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\