1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 1991, 1995 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
19 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
25 #include <sys/types.h>
29 #endif /* HAVE_LIMITS_H */
43 #define MAX(a,b) (((a) > (b)) ? (a) : (b))
48 static char *xrealloc ();
49 static char *xmalloc ();
50 static void cleanup ();
51 static void close_output_file ();
52 static void create_output_file ();
53 static void save_line_to_file ();
61 /* Increment size of area for control records. */
64 /* The default prefix for output file names. */
65 #define DEFAULT_PREFIX "xx"
69 /* A compiled pattern arg. */
72 char *regexpr
; /* Non-compiled regular expression. */
73 struct re_pattern_buffer re_compiled
; /* Compiled regular expression. */
74 int offset
; /* Offset from regexp to split at. */
75 int lines_required
; /* Number of lines required. */
76 unsigned int repeat
; /* Repeat count. */
77 int repeat_forever
; /* Non-zero if `*' used as a repeat count. */
78 int argnum
; /* ARGV index. */
79 boolean ignore
; /* If true, produce no output (for regexp). */
82 /* Initial size of data area in buffers. */
83 #define START_SIZE 8191
85 /* Increment size for data area. */
86 #define INCR_SIZE 2048
88 /* Number of lines kept in each node in line list. */
92 /* Some small values to test the algorithms. */
93 #define START_SIZE 200
98 /* A string with a length count. */
105 /* Pointers to the beginnings of lines in the buffer area.
106 These structures are linked together if needed. */
109 unsigned used
; /* Number of offsets used in this struct. */
110 unsigned insert_index
; /* Next offset to use when inserting line. */
111 unsigned retrieve_index
; /* Next index to use when retrieving line. */
112 struct cstring starts
[CTRL_SIZE
]; /* Lines in the data area. */
113 struct line
*next
; /* Next in linked list. */
116 /* The structure to hold the input lines.
117 Contains a pointer to the data area and a list containing
118 pointers to the individual lines. */
121 unsigned bytes_alloc
; /* Size of the buffer area. */
122 unsigned bytes_used
; /* Bytes used in the buffer area. */
123 unsigned start_line
; /* First line number in this buffer. */
124 unsigned first_available
; /* First line that can be retrieved. */
125 unsigned num_lines
; /* Number of complete lines in this buffer. */
126 char *buffer
; /* Data area. */
127 struct line
*line_start
; /* Head of list of pointers to lines. */
128 struct line
*curr_line
; /* The line start record currently in use. */
129 struct buffer_record
*next
;
132 /* The name this program was run with. */
135 /* Convert the number of 8-bit bytes of a binary representation to
136 the number of characters required to represent the same quantity
137 as an unsigned octal. For example, a 32-bit (4-byte) quantity may
138 require a field width as wide as 11 characters. */
139 static const unsigned int bytes_to_octal_digits
[] =
140 {0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
142 /* Input file descriptor. */
143 static int input_desc
= 0;
145 /* List of available buffers. */
146 static struct buffer_record
*free_list
= NULL
;
148 /* Start of buffer list. */
149 static struct buffer_record
*head
= NULL
;
151 /* Partially read line. */
152 static char *hold_area
= NULL
;
154 /* Number of chars in `hold_area'. */
155 static unsigned hold_count
= 0;
157 /* Number of the last line in the buffers. */
158 static unsigned last_line_number
= 0;
160 /* Number of the line currently being examined. */
161 static unsigned current_line
= 0;
163 /* If TRUE, we have read EOF. */
164 static boolean have_read_eof
= FALSE
;
166 /* Name of output files. */
167 static char *filename_space
= NULL
;
169 /* Prefix part of output file names. */
170 static char *prefix
= NULL
;
172 /* Suffix part of output file names. */
173 static char *suffix
= NULL
;
175 /* Number of digits to use in output file names. */
176 static int digits
= 2;
178 /* Number of files created so far. */
179 static unsigned int files_created
= 0;
181 /* Number of bytes written to current file. */
182 static unsigned int bytes_written
;
184 /* Output file pointer. */
185 static FILE *output_stream
= NULL
;
187 /* Output file name. */
188 static char *output_filename
= NULL
;
190 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
191 static char **global_argv
;
193 /* If TRUE, do not print the count of bytes in each output file. */
194 static boolean suppress_count
;
196 /* If TRUE, remove output files on error. */
197 static boolean remove_files
;
199 /* If TRUE, remove all output files which have a zero length. */
200 static boolean elide_empty_files
;
202 /* The compiled pattern arguments, which determine how to split
204 static struct control
*controls
;
206 /* Number of elements in `controls'. */
207 static unsigned int control_used
;
209 /* If non-zero, display usage information and exit. */
210 static int show_help
;
212 /* If non-zero, print the version on standard output then exit. */
213 static int show_version
;
215 static struct option
const longopts
[] =
217 {"digits", required_argument
, NULL
, 'n'},
218 {"quiet", no_argument
, NULL
, 'q'},
219 {"silent", no_argument
, NULL
, 's'},
220 {"keep-files", no_argument
, NULL
, 'k'},
221 {"elide-empty-files", no_argument
, NULL
, 'z'},
222 {"prefix", required_argument
, NULL
, 'f'},
223 {"suffix-format", required_argument
, NULL
, 'b'},
224 {"help", no_argument
, &show_help
, 1},
225 {"version", no_argument
, &show_version
, 1},
229 /* Allocate N bytes of memory dynamically, with error checking. */
240 error (0, 0, "virtual memory exhausted");
246 /* Change the size of an allocated block of memory P to N bytes,
248 If P is NULL, run xmalloc.
249 If N is 0, run free and return NULL. */
266 error (0, 0, "virtual memory exhausted");
272 /* Keep track of NUM chars of a partial line in buffer START.
273 These chars will be retrieved later when another large buffer is read.
274 It is not necessary to create a new buffer for these chars; instead,
275 we keep a pointer to the existing buffer. This buffer *is* on the
276 free list, and when the next buffer is obtained from this list
277 (even if it is this one), these chars will be placed at the
278 start of the new buffer. */
281 save_to_hold_area (start
, num
)
289 /* Read up to MAX_N_BYTES chars from the input stream into DEST.
290 Return the number of chars read. */
293 read_input (dest
, max_n_bytes
)
295 unsigned int max_n_bytes
;
299 if (max_n_bytes
== 0)
302 bytes_read
= safe_read (input_desc
, dest
, max_n_bytes
);
305 have_read_eof
= TRUE
;
309 error (0, errno
, "read error");
316 /* Initialize existing line record P. */
319 clear_line_control (p
)
324 p
->retrieve_index
= 0;
327 /* Initialize all line records in B. */
330 clear_all_line_control (b
)
331 struct buffer_record
*b
;
335 for (l
= b
->line_start
; l
; l
= l
->next
)
336 clear_line_control (l
);
339 /* Return a new, initialized line record. */
346 p
= (struct line
*) xmalloc (sizeof (struct line
));
349 clear_line_control (p
);
354 /* Record LINE_START, which is the address of the start of a line
355 of length LINE_LEN in the large buffer, in the lines buffer of B. */
358 keep_new_line (b
, line_start
, line_len
)
359 struct buffer_record
*b
;
365 /* If there is no existing area to keep line info, get some. */
366 if (b
->line_start
== NULL
)
367 b
->line_start
= b
->curr_line
= new_line_control ();
369 /* If existing area for lines is full, get more. */
370 if (b
->curr_line
->used
== CTRL_SIZE
)
372 b
->curr_line
->next
= new_line_control ();
373 b
->curr_line
= b
->curr_line
->next
;
378 /* Record the start of the line, and update counters. */
379 l
->starts
[l
->insert_index
].str
= line_start
;
380 l
->starts
[l
->insert_index
].len
= line_len
;
385 /* Scan the buffer in B for newline characters
386 and record the line start locations and lengths in B.
387 Return the number of lines found in this buffer.
389 There may be an incomplete line at the end of the buffer;
390 a pointer is kept to this area, which will be used when
391 the next buffer is filled. */
394 record_line_starts (b
)
395 struct buffer_record
*b
;
397 char *line_start
; /* Start of current line. */
398 char *line_end
; /* End of each line found. */
399 unsigned int bytes_left
; /* Length of incomplete last line. */
400 unsigned int lines
; /* Number of lines found. */
401 unsigned int line_length
; /* Length of each line found. */
403 if (b
->bytes_used
== 0)
407 line_start
= b
->buffer
;
408 bytes_left
= b
->bytes_used
;
412 line_end
= memchr (line_start
, '\n', bytes_left
);
413 if (line_end
== NULL
)
415 line_length
= line_end
- line_start
+ 1;
416 keep_new_line (b
, line_start
, line_length
);
417 bytes_left
-= line_length
;
418 line_start
= line_end
+ 1;
422 /* Check for an incomplete last line. */
427 keep_new_line (b
, line_start
, bytes_left
);
431 save_to_hold_area (line_start
, bytes_left
);
434 b
->num_lines
= lines
;
435 b
->first_available
= b
->start_line
= last_line_number
+ 1;
436 last_line_number
+= lines
;
441 /* Return a new buffer with room to store SIZE bytes, plus
442 an extra byte for safety. */
444 static struct buffer_record
*
445 create_new_buffer (size
)
448 struct buffer_record
*new_buffer
;
450 new_buffer
= (struct buffer_record
*)
451 xmalloc (sizeof (struct buffer_record
));
453 new_buffer
->buffer
= (char *) xmalloc (size
+ 1);
455 new_buffer
->bytes_alloc
= size
;
456 new_buffer
->line_start
= new_buffer
->curr_line
= NULL
;
461 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
462 least that size is currently free, use it, otherwise create a new one. */
464 static struct buffer_record
*
465 get_new_buffer (min_size
)
466 unsigned int min_size
;
468 struct buffer_record
*p
, *q
;
469 struct buffer_record
*new_buffer
; /* Buffer to return. */
470 unsigned int alloc_size
; /* Actual size that will be requested. */
472 alloc_size
= START_SIZE
;
473 while (min_size
> alloc_size
)
474 alloc_size
+= INCR_SIZE
;
476 if (free_list
== NULL
)
477 new_buffer
= create_new_buffer (alloc_size
);
480 /* Use first-fit to find a buffer. */
481 p
= new_buffer
= NULL
;
486 if (q
->bytes_alloc
>= min_size
)
499 new_buffer
= (q
? q
: create_new_buffer (alloc_size
));
501 new_buffer
->curr_line
= new_buffer
->line_start
;
502 clear_all_line_control (new_buffer
);
505 new_buffer
->num_lines
= 0;
506 new_buffer
->bytes_used
= 0;
507 new_buffer
->start_line
= new_buffer
->first_available
= last_line_number
+ 1;
508 new_buffer
->next
= NULL
;
513 /* Add buffer BUF to the list of free buffers. */
517 struct buffer_record
*buf
;
519 buf
->next
= free_list
;
523 /* Append buffer BUF to the linked list of buffers that contain
524 some data yet to be processed. */
528 struct buffer_record
*buf
;
530 struct buffer_record
*p
;
533 buf
->curr_line
= buf
->line_start
;
539 for (p
= head
; p
->next
; p
= p
->next
)
545 /* Fill a buffer of input.
547 Set the initial size of the buffer to a default.
548 Fill the buffer (from the hold area and input stream)
549 and find the individual lines.
550 If no lines are found (the buffer is too small to hold the next line),
551 release the current buffer (whose contents would have been put in the
552 hold area) and repeat the process with another large buffer until at least
553 one entire line has been read.
555 Return TRUE if a new buffer was obtained, otherwise false
556 (in which case end-of-file must have been encountered). */
561 struct buffer_record
*b
;
562 unsigned int bytes_wanted
= START_SIZE
; /* Minimum buffer size. */
563 unsigned int bytes_avail
; /* Size of new buffer created. */
564 unsigned int lines_found
; /* Number of lines in this new buffer. */
565 char *p
; /* Place to load into buffer. */
570 /* We must make the buffer at least as large as the amount of data
571 in the partial line left over from the last call. */
572 if (bytes_wanted
< hold_count
)
573 bytes_wanted
= hold_count
;
577 b
= get_new_buffer (bytes_wanted
);
578 bytes_avail
= b
->bytes_alloc
; /* Size of buffer returned. */
581 /* First check the `holding' area for a partial line. */
585 memcpy (p
, hold_area
, hold_count
);
587 b
->bytes_used
+= hold_count
;
588 bytes_avail
-= hold_count
;
592 b
->bytes_used
+= (unsigned int) read_input (p
, bytes_avail
);
594 lines_found
= record_line_starts (b
);
595 bytes_wanted
= b
->bytes_alloc
* 2;
599 while (!lines_found
&& !have_read_eof
);
604 return lines_found
!= 0;
607 /* Return the line number of the first line that has not yet been retrieved. */
610 get_first_line_in_buffer ()
612 if (head
== NULL
&& !load_buffer ())
613 error (1, errno
, "input disappeared");
615 return head
->first_available
;
618 /* Return a pointer to the logical first line in the buffer and make the
619 next line the logical first line.
620 Return NULL if there is no more input. */
622 static struct cstring
*
625 struct cstring
*line
; /* Return value. */
626 struct line
*l
; /* For convenience. */
628 if (head
== NULL
&& !load_buffer ())
631 if (current_line
< head
->first_available
)
632 current_line
= head
->first_available
;
634 ++(head
->first_available
);
638 line
= &l
->starts
[l
->retrieve_index
];
640 /* Advance index to next line. */
641 if (++l
->retrieve_index
== l
->used
)
643 /* Go on to the next line record. */
644 head
->curr_line
= l
->next
;
645 if (head
->curr_line
== NULL
|| head
->curr_line
->used
== 0)
647 /* Go on to the next data block. */
648 struct buffer_record
*b
= head
;
657 /* Search the buffers for line LINENUM, reading more input if necessary.
658 Return a pointer to the line, or NULL if it is not found in the file. */
660 static struct cstring
*
662 unsigned int linenum
;
664 struct buffer_record
*b
;
666 if (head
== NULL
&& !load_buffer ())
669 if (linenum
< head
->start_line
)
674 if (linenum
< b
->start_line
+ b
->num_lines
)
676 /* The line is in this buffer. */
678 unsigned int offset
; /* How far into the buffer the line is. */
681 offset
= linenum
- b
->start_line
;
682 /* Find the control record. */
683 while (offset
>= CTRL_SIZE
)
688 return &l
->starts
[offset
];
690 if (b
->next
== NULL
&& !load_buffer ())
692 b
= b
->next
; /* Try the next data block. */
696 /* Return TRUE if at least one more line is available for input. */
701 return (find_line (current_line
+ 1) == NULL
) ? TRUE
: FALSE
;
704 /* Set the name of the input file to NAME and open it. */
707 set_input_file (name
)
710 if (!strcmp (name
, "-"))
714 input_desc
= open (name
, O_RDONLY
);
716 error (1, errno
, "%s", name
);
720 /* Write all lines from the beginning of the buffer up to, but
721 not including, line LAST_LINE, to the current output file.
722 If IGNORE is TRUE, do not output lines selected here.
723 ARGNUM is the index in ARGV of the current pattern. */
726 write_to_file (last_line
, ignore
, argnum
)
727 unsigned int last_line
;
731 struct cstring
*line
;
732 unsigned int first_line
; /* First available input line. */
733 unsigned int lines
; /* Number of lines to output. */
736 first_line
= get_first_line_in_buffer ();
738 if (first_line
> last_line
)
740 error (0, 0, "%s: line number out of range", global_argv
[argnum
]);
744 lines
= last_line
- first_line
;
746 for (i
= 0; i
< lines
; i
++)
748 line
= remove_line ();
751 error (0, 0, "%s: line number out of range", global_argv
[argnum
]);
755 save_line_to_file (line
);
759 /* Output any lines left after all regexps have been processed. */
764 struct cstring
*line
;
766 while ((line
= remove_line ()) != NULL
)
767 save_line_to_file (line
);
770 /* Handle an attempt to read beyond EOF under the control of record P,
771 on iteration REPETITION if nonzero. */
774 handle_line_error (p
, repetition
)
778 fprintf (stderr
, "%s: `%d': line number out of range",
779 program_name
, p
->lines_required
);
781 fprintf (stderr
, " on repetition %d\n", repetition
);
783 fprintf (stderr
, "\n");
788 /* Determine the line number that marks the end of this file,
789 then get those lines and save them to the output file.
790 P is the control record.
791 REPETITION is the repetition number. */
794 process_line_count (p
, repetition
)
798 unsigned int linenum
;
799 unsigned int last_line_to_save
= p
->lines_required
* (repetition
+ 1);
800 struct cstring
*line
;
802 create_output_file ();
804 linenum
= get_first_line_in_buffer ();
806 /* Check for requesting a line that has already been written out.
807 If this ever happens, it's due to a bug in csplit. */
808 if (linenum
>= last_line_to_save
)
811 while (linenum
++ < last_line_to_save
)
813 line
= remove_line ();
815 handle_line_error (p
, repetition
);
816 save_line_to_file (line
);
819 close_output_file ();
821 /* Ensure that the line number specified is not 1 greater than
822 the number of lines in the file. */
823 if (no_more_lines ())
824 handle_line_error (p
, repetition
);
828 regexp_error (p
, repetition
, ignore
)
833 fprintf (stderr
, "%s: `%s': match not found",
834 program_name
, global_argv
[p
->argnum
]);
837 fprintf (stderr
, " on repetition %d\n", repetition
);
839 fprintf (stderr
, "\n");
843 dump_rest_of_file ();
844 close_output_file ();
849 /* Read the input until a line matches the regexp in P, outputting
850 it unless P->IGNORE is TRUE.
851 REPETITION is this repeat-count; 0 means the first time. */
854 process_regexp (p
, repetition
)
858 struct cstring
*line
; /* From input file. */
859 unsigned int line_len
; /* To make "$" in regexps work. */
860 unsigned int break_line
; /* First line number of next file. */
861 boolean ignore
= p
->ignore
; /* If TRUE, skip this section. */
865 create_output_file ();
867 /* If there is no offset for the regular expression, or
868 it is positive, then it is not necessary to buffer the lines. */
874 line
= find_line (++current_line
);
877 if (p
->repeat_forever
)
881 dump_rest_of_file ();
882 close_output_file ();
887 regexp_error (p
, repetition
, ignore
);
889 line_len
= line
->len
;
890 if (line
->str
[line_len
- 1] == '\n')
892 ret
= re_search (&p
->re_compiled
, line
->str
, line_len
,
893 0, line_len
, (struct re_registers
*) 0);
896 error (0, 0, "error in regular expression search");
901 line
= remove_line ();
903 save_line_to_file (line
);
911 /* Buffer the lines. */
914 line
= find_line (++current_line
);
917 if (p
->repeat_forever
)
921 dump_rest_of_file ();
922 close_output_file ();
927 regexp_error (p
, repetition
, ignore
);
929 line_len
= line
->len
;
930 if (line
->str
[line_len
- 1] == '\n')
932 ret
= re_search (&p
->re_compiled
, line
->str
, line_len
,
933 0, line_len
, (struct re_registers
*) 0);
936 error (0, 0, "error in regular expression search");
944 /* Account for any offset from this regexp. */
945 break_line
= current_line
+ p
->offset
;
947 write_to_file (break_line
, ignore
, p
->argnum
);
950 close_output_file ();
952 current_line
= break_line
;
955 /* Split the input file according to the control records we have built. */
962 for (i
= 0; i
< control_used
; i
++)
964 if (controls
[i
].regexpr
)
966 for (j
= 0; (controls
[i
].repeat_forever
967 || j
<= controls
[i
].repeat
); j
++)
968 process_regexp (&controls
[i
], j
);
972 for (j
= 0; (controls
[i
].repeat_forever
973 || j
<= controls
[i
].repeat
); j
++)
974 process_line_count (&controls
[i
], j
);
978 create_output_file ();
979 dump_rest_of_file ();
980 close_output_file ();
983 /* Return the name of output file number NUM. */
989 strcpy (filename_space
, prefix
);
991 sprintf (filename_space
+strlen(prefix
), suffix
, num
);
993 sprintf (filename_space
+strlen(prefix
), "%0*d", digits
, num
);
994 return filename_space
;
997 /* Create the next output file. */
1000 create_output_file ()
1002 output_filename
= make_filename (files_created
);
1003 output_stream
= fopen (output_filename
, "w");
1004 if (output_stream
== NULL
)
1006 error (0, errno
, "%s", output_filename
);
1013 /* Delete all the files we have created. */
1021 for (i
= 0; i
< files_created
; i
++)
1023 name
= make_filename (i
);
1025 error (0, errno
, "%s", name
);
1029 /* Close the current output file and print the count
1030 of characters in this file. */
1033 close_output_file ()
1037 if (fclose (output_stream
) == EOF
)
1039 error (0, errno
, "write error for `%s'", output_filename
);
1042 if (bytes_written
== 0 && elide_empty_files
)
1044 if (unlink (output_filename
))
1045 error (0, errno
, "%s", output_filename
);
1049 if (!suppress_count
)
1050 fprintf (stdout
, "%d\n", bytes_written
);
1051 output_stream
= NULL
;
1055 /* Optionally remove files created so far; then exit.
1056 Called when an error detected. */
1062 close_output_file ();
1065 delete_all_files ();
1070 /* Save line LINE to the output file and
1071 increment the character count for the current file. */
1074 save_line_to_file (line
)
1075 struct cstring
*line
;
1077 fwrite (line
->str
, sizeof (char), line
->len
, output_stream
);
1078 bytes_written
+= line
->len
;
1081 /* Return a new, initialized control record. */
1083 static struct control
*
1084 new_control_record ()
1086 static unsigned control_allocated
= 0; /* Total space allocated. */
1089 if (control_allocated
== 0)
1091 control_allocated
= ALLOC_SIZE
;
1092 controls
= (struct control
*)
1093 xmalloc (sizeof (struct control
) * control_allocated
);
1095 else if (control_used
== control_allocated
)
1097 control_allocated
+= ALLOC_SIZE
;
1098 controls
= (struct control
*)
1099 xrealloc (controls
, sizeof (struct control
) * control_allocated
);
1101 p
= &controls
[control_used
++];
1104 p
->repeat_forever
= 0;
1105 p
->lines_required
= 0;
1110 /* Convert string NUM to an integer and put the value in *RESULT.
1111 Return a TRUE if the string consists entirely of digits,
1113 /* FIXME: use xstrtoul in place of this function. */
1116 string_to_number (result
, num
)
1126 while ((ch
= *num
++))
1130 val
= val
* 10 + ch
- '0';
1137 /* Check if there is a numeric offset after a regular expression.
1138 STR is the entire command line argument.
1139 P is the control record for this regular expression.
1140 NUM is the numeric part of STR. */
1143 check_for_offset (p
, str
, num
)
1148 if (*num
!= '-' && *num
!= '+')
1149 error (1, 0, "%s: `+' or `-' expected after delimeter", str
);
1151 if (!string_to_number (&p
->offset
, num
+ 1))
1152 error (1, 0, "%s: integer expected after `%c'", str
, *num
);
1155 p
->offset
= -p
->offset
;
1158 /* Given that the first character of command line arg STR is '{',
1159 make sure that the rest of the string is a valid repeat count
1160 and store its value in P.
1161 ARGNUM is the ARGV index of STR. */
1164 parse_repeat_count (argnum
, p
, str
)
1171 end
= str
+ strlen (str
) - 1;
1173 error (1, 0, "%s: `}' is required in repeat count", str
);
1176 if (str
+1 == end
-1 && *(str
+1) == '*')
1177 p
->repeat_forever
= 1;
1179 if (!string_to_number (&p
->repeat
, str
+ 1))
1180 error (1, 0, "%s}: integer required between `{' and `}'",
1181 global_argv
[argnum
]);
1186 /* Extract the regular expression from STR and check for a numeric offset.
1187 STR should start with the regexp delimiter character.
1188 Return a new control record for the regular expression.
1189 ARGNUM is the ARGV index of STR.
1190 Unless IGNORE is TRUE, mark these lines for output. */
1192 static struct control
*
1193 extract_regexp (argnum
, ignore
, str
)
1198 int len
; /* Number of chars in this regexp. */
1200 char *closing_delim
;
1204 closing_delim
= strrchr (str
+ 1, delim
);
1205 if (closing_delim
== NULL
)
1206 error (1, 0, "%s: closing delimeter `%c' missing", str
, delim
);
1208 len
= closing_delim
- str
- 1;
1209 p
= new_control_record ();
1213 p
->regexpr
= (char *) xmalloc ((unsigned) (len
+ 1));
1214 strncpy (p
->regexpr
, str
+ 1, len
);
1215 p
->re_compiled
.allocated
= len
* 2;
1216 p
->re_compiled
.buffer
= (unsigned char *) xmalloc (p
->re_compiled
.allocated
);
1217 p
->re_compiled
.fastmap
= xmalloc (256);
1218 p
->re_compiled
.translate
= 0;
1219 err
= re_compile_pattern (p
->regexpr
, len
, &p
->re_compiled
);
1222 error (0, 0, "%s: invalid regular expression: %s", str
, err
);
1226 if (closing_delim
[1])
1227 check_for_offset (p
, str
, closing_delim
+ 1);
1232 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1233 After each pattern, check if the next argument is a repeat count. */
1236 parse_patterns (argc
, start
, argv
)
1241 int i
; /* Index into ARGV. */
1242 struct control
*p
; /* New control record created. */
1244 for (i
= start
; i
< argc
; i
++)
1246 if (*argv
[i
] == '/' || *argv
[i
] == '%')
1248 p
= extract_regexp (i
, *argv
[i
] == '%', argv
[i
]);
1252 p
= new_control_record ();
1254 if (!string_to_number (&p
->lines_required
, argv
[i
]))
1255 error (1, 0, "%s: invalid pattern", argv
[i
]);
1258 if (i
+ 1 < argc
&& *argv
[i
+ 1] == '{')
1260 /* We have a repeat count. */
1262 parse_repeat_count (i
, p
, argv
[i
]);
1268 get_format_flags (format_ptr
)
1273 for (; **format_ptr
; (*format_ptr
)++)
1275 switch (**format_ptr
)
1286 count
+= 2; /* Allow for 0x prefix preceeding an `x' conversion. */
1297 get_format_width (format_ptr
)
1304 start
= *format_ptr
;
1305 for (; **format_ptr
; (*format_ptr
)++)
1306 if (!ISDIGIT (**format_ptr
))
1309 ch_save
= **format_ptr
;
1310 **format_ptr
= '\0';
1311 /* In the case where no minimum field width is explicitly specified,
1312 allow for enough octal digits to represent the value of LONG_MAX. */
1313 count
= ((*format_ptr
== start
)
1314 ? bytes_to_octal_digits
[sizeof (long)]
1316 **format_ptr
= ch_save
;
1321 get_format_prec (format_ptr
)
1329 if (**format_ptr
!= '.')
1333 if (**format_ptr
== '-' || **format_ptr
== '+')
1335 is_negative
= (**format_ptr
== '-');
1343 start
= *format_ptr
;
1344 for (; **format_ptr
; (*format_ptr
)++)
1345 if (!ISDIGIT (**format_ptr
))
1348 /* ANSI 4.9.6.1 says that if the precision is negative, it's as good as
1351 start
= *format_ptr
;
1353 ch_save
= **format_ptr
;
1354 **format_ptr
= '\0';
1355 count
= (*format_ptr
== start
) ? 11 : atoi (start
);
1356 **format_ptr
= ch_save
;
1362 get_format_conv_type (format_ptr
)
1365 int ch
= *((*format_ptr
)++);
1378 error (1, 0, "missing conversion specifier in suffix");
1383 error (1, 0, "invalid conversion specifier in suffix: %c", ch
);
1385 error (1, 0, "invalid conversion specifier in suffix: \\%.3o", ch
);
1393 unsigned out_count
= 0;
1394 unsigned percents
= 0;
1405 out_count
+= get_format_flags (&format
);
1407 int width
= get_format_width (&format
);
1408 int prec
= get_format_prec (&format
);
1410 out_count
+= MAX (width
, prec
);
1412 get_format_conv_type (&format
);
1417 error (1, 0, "missing %% conversion specification in suffix");
1418 else if (percents
> 1)
1419 error (1, 0, "too many %% conversion specifications in suffix");
1425 interrupt_handler (signum
)
1428 error (0, 0, "interrupted");
1438 #ifdef _POSIX_VERSION
1439 struct sigaction oldact
, newact
;
1440 #endif /* _POSIX_VERSION */
1442 program_name
= argv
[0];
1446 suppress_count
= FALSE
;
1447 remove_files
= TRUE
;
1448 prefix
= DEFAULT_PREFIX
;
1450 #ifdef _POSIX_VERSION
1451 newact
.sa_handler
= interrupt_handler
;
1452 sigemptyset (&newact
.sa_mask
);
1453 newact
.sa_flags
= 0;
1455 sigaction (SIGHUP
, NULL
, &oldact
);
1456 if (oldact
.sa_handler
!= SIG_IGN
)
1457 sigaction (SIGHUP
, &newact
, NULL
);
1459 sigaction (SIGINT
, NULL
, &oldact
);
1460 if (oldact
.sa_handler
!= SIG_IGN
)
1461 sigaction (SIGINT
, &newact
, NULL
);
1463 sigaction (SIGQUIT
, NULL
, &oldact
);
1464 if (oldact
.sa_handler
!= SIG_IGN
)
1465 sigaction (SIGQUIT
, &newact
, NULL
);
1467 sigaction (SIGTERM
, NULL
, &oldact
);
1468 if (oldact
.sa_handler
!= SIG_IGN
)
1469 sigaction (SIGTERM
, &newact
, NULL
);
1470 #else /* !_POSIX_VERSION */
1471 if (signal (SIGHUP
, SIG_IGN
) != SIG_IGN
)
1472 signal (SIGHUP
, interrupt_handler
);
1473 if (signal (SIGINT
, SIG_IGN
) != SIG_IGN
)
1474 signal (SIGINT
, interrupt_handler
);
1475 if (signal (SIGQUIT
, SIG_IGN
) != SIG_IGN
)
1476 signal (SIGQUIT
, interrupt_handler
);
1477 if (signal (SIGTERM
, SIG_IGN
) != SIG_IGN
)
1478 signal (SIGTERM
, interrupt_handler
);
1481 while ((optc
= getopt_long (argc
, argv
, "f:b:kn:sqz", longopts
, (int *) 0))
1497 remove_files
= FALSE
;
1501 if (!string_to_number (&digits
, optarg
))
1502 error (1, 0, "%s: invalid number", optarg
);
1507 suppress_count
= TRUE
;
1511 elide_empty_files
= TRUE
;
1520 printf ("csplit - %s\n", version_string
);
1527 if (optind
>= argc
- 1)
1531 filename_space
= (char *) xmalloc (strlen (prefix
) + max_out (suffix
) + 2);
1533 filename_space
= (char *) xmalloc (strlen (prefix
) + digits
+ 2);
1535 set_input_file (argv
[optind
++]);
1537 parse_patterns (argc
, optind
, argv
);
1541 if (close (input_desc
) < 0)
1543 error (0, errno
, "read error");
1555 fprintf (stderr
, "Try `%s --help' for more information.\n",
1560 Usage: %s [OPTION]... FILE PATTERN...\n\
1564 Output pieces of FILE separated by PATTERN(s) to files `xx01', `xx02', ...,\n\
1565 and output byte counts of each piece to standard output.\n\
1567 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %%d\n\
1568 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1569 -k, --keep-files do not remove output files on errors\n\
1570 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1571 -s, --quiet, --silent do not print counts of output file sizes\n\
1572 -z, --elide-empty-files remove empty output files\n\
1573 --help display this help and exit\n\
1574 --version output version information and exit\n\
1576 Read standard input if FILE is -. Each PATTERN may be:\n\
1578 INTEGER copy up to but not including specified line number\n\
1579 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1580 %%REGEXP%%[OFFSET] skip to, but not including a matching line\n\
1581 {INTEGER} repeat the previous pattern specified number of times\n\
1582 {*} repeat the previous pattern as many times as possible\n\
1584 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\