1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 1995-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
21 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
32 #include "safe-read.h"
35 /* The official name of this program (e.g., no `g' prefix). */
36 #define PROGRAM_NAME "split"
38 #define AUTHORS "Torbjorn Granlund and Richard M. Stallman"
42 /* The name this program was run with. */
45 /* Base name of output files. */
48 /* Pointer to the end of the prefix in OUTFILE.
49 Suffixes are inserted here. */
50 static char *outfile_mid
;
52 /* Pointer to the end of OUTFILE. */
53 static char *outfile_end
;
55 /* Name of input file. May be "-". */
58 /* Descriptor on which input file is open. */
59 static int input_desc
;
61 /* Descriptor on which output file is open. */
62 static int output_desc
;
64 /* If nonzero, print a diagnostic on standard error just before each
65 output file is opened. */
68 static struct option
const longopts
[] =
70 {"bytes", required_argument
, NULL
, 'b'},
71 {"lines", required_argument
, NULL
, 'l'},
72 {"line-bytes", required_argument
, NULL
, 'C'},
73 {"verbose", no_argument
, NULL
, 2},
74 {GETOPT_HELP_OPTION_DECL
},
75 {GETOPT_VERSION_OPTION_DECL
},
83 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
88 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
92 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
93 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
95 -b, --bytes=SIZE put SIZE bytes per output file\n\
96 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
97 -l, --lines=NUMBER put NUMBER lines per output file\n\
98 -NUMBER same as -l NUMBER\n\
99 --verbose print a diagnostic to standard error just\n\
100 before each output file is opened\n\
101 --help display this help and exit\n\
102 --version output version information and exit\n\
104 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
106 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
108 exit (status
== 0 ? EXIT_SUCCESS
: EXIT_FAILURE
);
111 /* Compute the next sequential output file name suffix and store it
112 into the string `outfile' at the position pointed to by `outfile_mid'. */
115 next_file_name (void)
117 static unsigned n_digits
= 2;
120 /* Change any suffix of `z's to `a's. */
121 for (p
= outfile_end
- 1; *p
== 'z'; p
--)
126 /* Increment the rightmost non-`z' character that was present before the
127 above z/a substitutions. There is guaranteed to be such a character. */
130 /* If the result of that increment operation yielded a `z' and there
131 are only `z's to the left of it, then append two more `a' characters
132 to the end and add 1 (-1 + 2) to the number of digits (we're taking
133 out this `z' and adding two `a's). */
134 if (*p
== 'z' && p
== outfile_mid
)
138 *outfile_end
++ = 'a';
139 *outfile_end
++ = 'a';
143 /* Write BYTES bytes at BP to an output file.
144 If NEW_FILE_FLAG is nonzero, open the next output file.
145 Otherwise add to the same output file already in use. */
148 cwrite (int new_file_flag
, const char *bp
, int bytes
)
152 if (output_desc
>= 0 && close (output_desc
) < 0)
153 error (EXIT_FAILURE
, errno
, "%s", outfile
);
157 fprintf (stderr
, _("creating file `%s'\n"), outfile
);
158 output_desc
= open (outfile
,
159 O_WRONLY
| O_CREAT
| O_TRUNC
| O_BINARY
, 0666);
161 error (EXIT_FAILURE
, errno
, "%s", outfile
);
163 if (full_write (output_desc
, bp
, bytes
) < 0)
164 error (EXIT_FAILURE
, errno
, "%s", outfile
);
167 /* Read NCHARS bytes from the input file into BUF.
168 Return the number of bytes successfully read.
169 If this is less than NCHARS, do not call `stdread' again. */
172 stdread (char *buf
, int nchars
)
175 int to_be_read
= nchars
;
179 n_read
= safe_read (input_desc
, buf
, to_be_read
);
184 to_be_read
-= n_read
;
187 return nchars
- to_be_read
;
190 /* Split into pieces of exactly NCHARS bytes.
191 Use buffer BUF, whose size is BUFSIZE. */
194 bytes_split (int nchars
, char *buf
, int bufsize
)
197 int new_file_flag
= 1;
199 int to_write
= nchars
;
204 n_read
= stdread (buf
, bufsize
);
206 error (EXIT_FAILURE
, errno
, "%s", infile
);
211 if (to_read
< to_write
)
213 if (to_read
) /* do not write 0 bytes! */
215 cwrite (new_file_flag
, bp_out
, to_read
);
223 cwrite (new_file_flag
, bp_out
, to_write
);
231 while (n_read
== bufsize
);
234 /* Split into pieces of exactly NLINES lines.
235 Use buffer BUF, whose size is BUFSIZE. */
238 lines_split (int nlines
, char *buf
, int bufsize
)
241 char *bp
, *bp_out
, *eob
;
242 int new_file_flag
= 1;
247 n_read
= stdread (buf
, bufsize
);
249 error (EXIT_FAILURE
, errno
, "%s", infile
);
255 while (*bp
++ != '\n')
256 ; /* this semicolon takes most of the time */
259 if (eob
!= bp_out
) /* do not write 0 bytes! */
261 cwrite (new_file_flag
, bp_out
, eob
- bp_out
);
269 cwrite (new_file_flag
, bp_out
, bp
- bp_out
);
276 while (n_read
== bufsize
);
279 /* Split into pieces that are as large as possible while still not more
280 than NCHARS bytes, and are split on line boundaries except
281 where lines longer than NCHARS bytes occur. */
284 line_bytes_split (int nchars
)
290 char *buf
= (char *) xmalloc (nchars
);
294 /* Fill up the full buffer size from the input file. */
296 n_read
= stdread (buf
+ n_buffered
, nchars
- n_buffered
);
298 error (EXIT_FAILURE
, errno
, "%s", infile
);
300 n_buffered
+= n_read
;
301 if (n_buffered
!= nchars
)
304 /* Find where to end this chunk. */
305 bp
= buf
+ n_buffered
;
306 if (n_buffered
== nchars
)
308 while (bp
> buf
&& bp
[-1] != '\n')
312 /* If chunk has no newlines, use all the chunk. */
314 bp
= buf
+ n_buffered
;
316 /* Output the chars as one output file. */
317 cwrite (1, buf
, bp
- buf
);
319 /* Discard the chars we just output; move rest of chunk
320 down to be the start of the next chunk. Source and
321 destination probably overlap. */
322 n_buffered
-= bp
- buf
;
324 memmove (buf
, bp
, n_buffered
);
331 main (int argc
, char **argv
)
333 struct stat stat_buf
;
334 int num
; /* numeric argument from command line */
337 type_undef
, type_bytes
, type_byteslines
, type_lines
, type_digits
338 } split_type
= type_undef
;
339 int in_blk_size
; /* optimal block size of input file device */
340 char *buf
; /* file i/o buffer */
344 int digits_optind
= 0;
346 program_name
= argv
[0];
347 setlocale (LC_ALL
, "");
348 bindtextdomain (PACKAGE
, LOCALEDIR
);
349 textdomain (PACKAGE
);
351 /* Parse command line options. */
358 /* This is the argv-index of the option we will read next. */
359 int this_optind
= optind
? optind
: 1;
362 c
= getopt_long (argc
, argv
, "0123456789vb:l:C:", longopts
, (int *) 0);
372 if (split_type
!= type_undef
)
374 error (0, 0, _("cannot split in more than one way"));
375 usage (EXIT_FAILURE
);
377 split_type
= type_bytes
;
378 if (xstrtol (optarg
, NULL
, 10, &tmp_long
, "bkm") != LONGINT_OK
379 || tmp_long
< 0 || tmp_long
> INT_MAX
)
381 error (0, 0, _("%s: invalid number of bytes"), optarg
);
382 usage (EXIT_FAILURE
);
384 accum
= (int) tmp_long
;
388 if (split_type
!= type_undef
)
390 error (0, 0, _("cannot split in more than one way"));
391 usage (EXIT_FAILURE
);
393 split_type
= type_lines
;
394 if (xstrtol (optarg
, NULL
, 10, &tmp_long
, "") != LONGINT_OK
395 || tmp_long
< 0 || tmp_long
> INT_MAX
)
397 error (0, 0, _("%s: invalid number of lines"), optarg
);
398 usage (EXIT_FAILURE
);
400 accum
= (int) tmp_long
;
404 if (split_type
!= type_undef
)
406 error (0, 0, _("cannot split in more than one way"));
407 usage (EXIT_FAILURE
);
410 split_type
= type_byteslines
;
411 if (xstrtol (optarg
, NULL
, 10, &tmp_long
, "bkm") != LONGINT_OK
412 || tmp_long
< 0 || tmp_long
> INT_MAX
)
414 error (0, 0, _("%s: invalid number of bytes"), optarg
);
415 usage (EXIT_FAILURE
);
417 accum
= (int) tmp_long
;
430 if (split_type
!= type_undef
&& split_type
!= type_digits
)
432 error (0, 0, _("cannot split in more than one way"));
433 usage (EXIT_FAILURE
);
435 if (digits_optind
!= 0 && digits_optind
!= this_optind
)
436 accum
= 0; /* More than one number given; ignore other. */
437 digits_optind
= this_optind
;
438 split_type
= type_digits
;
439 accum
= accum
* 10 + c
- '0';
446 case_GETOPT_HELP_CHAR
;
448 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
451 usage (EXIT_FAILURE
);
455 /* Handle default case. */
456 if (split_type
== type_undef
)
458 split_type
= type_lines
;
464 error (0, 0, _("invalid number"));
465 usage (EXIT_FAILURE
);
469 /* Get out the filename arguments. */
472 infile
= argv
[optind
++];
475 outbase
= argv
[optind
++];
479 error (0, 0, _("too many arguments"));
480 usage (EXIT_FAILURE
);
483 /* Open the input file. */
484 if (STREQ (infile
, "-"))
488 input_desc
= open (infile
, O_RDONLY
);
490 error (EXIT_FAILURE
, errno
, "%s", infile
);
492 /* Binary I/O is safer when bytecounts are used. */
493 SET_BINARY (input_desc
);
495 /* No output file is open now. */
498 /* Copy the output file prefix so we can add suffixes to it.
499 26**29 is certainly enough output files! */
501 outfile
= xmalloc (strlen (outbase
) + 30);
502 strcpy (outfile
, outbase
);
503 outfile_mid
= outfile
+ strlen (outfile
);
504 outfile_end
= outfile_mid
+ 2;
505 memset (outfile_mid
, 0, 30);
506 outfile_mid
[0] = 'a';
507 outfile_mid
[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
509 /* Get the optimal block size of input device and make a buffer. */
511 if (fstat (input_desc
, &stat_buf
) < 0)
512 error (EXIT_FAILURE
, errno
, "%s", infile
);
513 in_blk_size
= ST_BLKSIZE (stat_buf
);
515 buf
= xmalloc (in_blk_size
+ 1);
521 lines_split (num
, buf
, in_blk_size
);
525 bytes_split (num
, buf
, in_blk_size
);
528 case type_byteslines
:
529 line_bytes_split (num
);
536 if (close (input_desc
) < 0)
537 error (EXIT_FAILURE
, errno
, "%s", infile
);
538 if (output_desc
>= 0 && close (output_desc
) < 0)
539 error (EXIT_FAILURE
, errno
, "%s", outfile
);