Add tests with filenames containing newline and backslash characters.
[coreutils.git] / src / split.c
blobf6caacc8de8b2a50078d25f6cc3fd01e55bc2c64
1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 95, 96, 1997, 1998 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
20 To do:
21 * Implement -t CHAR or -t REGEX to specify break characters other
22 than newline. */
24 #include <config.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <sys/types.h>
30 #include "system.h"
31 #include "error.h"
32 #include "xstrtol.h"
33 #include "safe-read.h"
35 int full_write ();
37 /* The name this program was run with. */
38 char *program_name;
40 /* Base name of output files. */
41 static char *outfile;
43 /* Pointer to the end of the prefix in OUTFILE.
44 Suffixes are inserted here. */
45 static char *outfile_mid;
47 /* Pointer to the end of OUTFILE. */
48 static char *outfile_end;
50 /* Name of input file. May be "-". */
51 static char *infile;
53 /* Descriptor on which input file is open. */
54 static int input_desc;
56 /* Descriptor on which output file is open. */
57 static int output_desc;
59 /* If nonzero, display usage information and exit. */
60 static int show_help;
62 /* If nonzero, print the version on standard output then exit. */
63 static int show_version;
65 /* If nonzero, print a diagnostic on standard error just before each
66 output file is opened. */
67 static int verbose;
69 static struct option const longopts[] =
71 {"bytes", required_argument, NULL, 'b'},
72 {"lines", required_argument, NULL, 'l'},
73 {"line-bytes", required_argument, NULL, 'C'},
74 {"verbose", no_argument, NULL, 2},
75 {"help", no_argument, &show_help, 1},
76 {"version", no_argument, &show_version, 1},
77 {NULL, 0, NULL, 0}
80 static void
81 usage (int status)
83 if (status != 0)
84 fprintf (stderr, _("Try `%s --help' for more information.\n"),
85 program_name);
86 else
88 printf (_("\
89 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
90 "),
91 program_name);
92 printf (_("\
93 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
94 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
95 \n\
96 -b, --bytes=SIZE put SIZE bytes per output file\n\
97 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
98 -l, --lines=NUMBER put NUMBER lines per output file\n\
99 -NUMBER same as -l NUMBER\n\
100 --verbose print a diagnostic to standard error just\n\
101 before each output file is opened\n\
102 --help display this help and exit\n\
103 --version output version information and exit\n\
105 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
106 "));
107 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
109 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
112 /* Compute the next sequential output file name suffix and store it
113 into the string `outfile' at the position pointed to by `outfile_mid'. */
115 static void
116 next_file_name (void)
118 static unsigned n_digits = 2;
119 char *p;
121 /* Change any suffix of `z's to `a's. */
122 for (p = outfile_end - 1; *p == 'z'; p--)
124 *p = 'a';
127 /* Increment the rightmost non-`z' character that was present before the
128 above z/a substitutions. There is guaranteed to be such a character. */
129 ++(*p);
131 /* If the result of that increment operation yielded a `z' and there
132 are only `z's to the left of it, then append two more `a' characters
133 to the end and add 1 (-1 + 2) to the number of digits (we're taking
134 out this `z' and adding two `a's). */
135 if (*p == 'z' && p == outfile_mid)
137 ++n_digits;
138 ++outfile_mid;
139 *outfile_end++ = 'a';
140 *outfile_end++ = 'a';
144 /* Write BYTES bytes at BP to an output file.
145 If NEW_FILE_FLAG is nonzero, open the next output file.
146 Otherwise add to the same output file already in use. */
148 static void
149 cwrite (int new_file_flag, const char *bp, int bytes)
151 if (new_file_flag)
153 if (output_desc >= 0 && close (output_desc) < 0)
154 error (EXIT_FAILURE, errno, "%s", outfile);
156 next_file_name ();
157 if (verbose)
158 fprintf (stderr, _("creating file `%s'\n"), outfile);
159 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
160 if (output_desc < 0)
161 error (EXIT_FAILURE, errno, "%s", outfile);
163 if (full_write (output_desc, bp, bytes) < 0)
164 error (EXIT_FAILURE, errno, "%s", outfile);
167 /* Read NCHARS bytes from the input file into BUF.
168 Return the number of bytes successfully read.
169 If this is less than NCHARS, do not call `stdread' again. */
171 static int
172 stdread (char *buf, int nchars)
174 int n_read;
175 int to_be_read = nchars;
177 while (to_be_read)
179 n_read = safe_read (input_desc, buf, to_be_read);
180 if (n_read < 0)
181 return -1;
182 if (n_read == 0)
183 break;
184 to_be_read -= n_read;
185 buf += n_read;
187 return nchars - to_be_read;
190 /* Split into pieces of exactly NCHARS bytes.
191 Use buffer BUF, whose size is BUFSIZE. */
193 static void
194 bytes_split (int nchars, char *buf, int bufsize)
196 int n_read;
197 int new_file_flag = 1;
198 int to_read;
199 int to_write = nchars;
200 char *bp_out;
204 n_read = stdread (buf, bufsize);
205 if (n_read < 0)
206 error (EXIT_FAILURE, errno, "%s", infile);
207 bp_out = buf;
208 to_read = n_read;
209 for (;;)
211 if (to_read < to_write)
213 if (to_read) /* do not write 0 bytes! */
215 cwrite (new_file_flag, bp_out, to_read);
216 to_write -= to_read;
217 new_file_flag = 0;
219 break;
221 else
223 cwrite (new_file_flag, bp_out, to_write);
224 bp_out += to_write;
225 to_read -= to_write;
226 new_file_flag = 1;
227 to_write = nchars;
231 while (n_read == bufsize);
234 /* Split into pieces of exactly NLINES lines.
235 Use buffer BUF, whose size is BUFSIZE. */
237 static void
238 lines_split (int nlines, char *buf, int bufsize)
240 int n_read;
241 char *bp, *bp_out, *eob;
242 int new_file_flag = 1;
243 int n = 0;
247 n_read = stdread (buf, bufsize);
248 if (n_read < 0)
249 error (EXIT_FAILURE, errno, "%s", infile);
250 bp = bp_out = buf;
251 eob = bp + n_read;
252 *eob = '\n';
253 for (;;)
255 while (*bp++ != '\n')
256 ; /* this semicolon takes most of the time */
257 if (bp > eob)
259 if (eob != bp_out) /* do not write 0 bytes! */
261 cwrite (new_file_flag, bp_out, eob - bp_out);
262 new_file_flag = 0;
264 break;
266 else
267 if (++n >= nlines)
269 cwrite (new_file_flag, bp_out, bp - bp_out);
270 bp_out = bp;
271 new_file_flag = 1;
272 n = 0;
276 while (n_read == bufsize);
279 /* Split into pieces that are as large as possible while still not more
280 than NCHARS bytes, and are split on line boundaries except
281 where lines longer than NCHARS bytes occur. */
283 static void
284 line_bytes_split (int nchars)
286 int n_read;
287 char *bp;
288 int eof = 0;
289 int n_buffered = 0;
290 char *buf = (char *) xmalloc (nchars);
294 /* Fill up the full buffer size from the input file. */
296 n_read = stdread (buf + n_buffered, nchars - n_buffered);
297 if (n_read < 0)
298 error (EXIT_FAILURE, errno, "%s", infile);
300 n_buffered += n_read;
301 if (n_buffered != nchars)
302 eof = 1;
304 /* Find where to end this chunk. */
305 bp = buf + n_buffered;
306 if (n_buffered == nchars)
308 while (bp > buf && bp[-1] != '\n')
309 bp--;
312 /* If chunk has no newlines, use all the chunk. */
313 if (bp == buf)
314 bp = buf + n_buffered;
316 /* Output the chars as one output file. */
317 cwrite (1, buf, bp - buf);
319 /* Discard the chars we just output; move rest of chunk
320 down to be the start of the next chunk. Source and
321 destination probably overlap. */
322 n_buffered -= bp - buf;
323 if (n_buffered > 0)
324 memmove (buf, bp, n_buffered);
326 while (!eof);
327 free (buf);
331 main (int argc, char **argv)
333 struct stat stat_buf;
334 int num; /* numeric argument from command line */
335 enum
337 type_undef, type_bytes, type_byteslines, type_lines, type_digits
338 } split_type = type_undef;
339 int in_blk_size; /* optimal block size of input file device */
340 char *buf; /* file i/o buffer */
341 int accum = 0;
342 char *outbase;
343 int c;
344 int digits_optind = 0;
346 program_name = argv[0];
347 setlocale (LC_ALL, "");
348 bindtextdomain (PACKAGE, LOCALEDIR);
349 textdomain (PACKAGE);
351 /* Parse command line options. */
353 infile = "-";
354 outbase = "x";
356 while (1)
358 /* This is the argv-index of the option we will read next. */
359 int this_optind = optind ? optind : 1;
360 long int tmp_long;
362 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
363 if (c == EOF)
364 break;
366 switch (c)
368 case 0:
369 break;
371 case 'b':
372 if (split_type != type_undef)
374 error (0, 0, _("cannot split in more than one way"));
375 usage (EXIT_FAILURE);
377 split_type = type_bytes;
378 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
379 || tmp_long < 0 || tmp_long > INT_MAX)
381 error (0, 0, _("%s: invalid number of bytes"), optarg);
382 usage (EXIT_FAILURE);
384 accum = (int) tmp_long;
385 break;
387 case 'l':
388 if (split_type != type_undef)
390 error (0, 0, _("cannot split in more than one way"));
391 usage (EXIT_FAILURE);
393 split_type = type_lines;
394 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
395 || tmp_long < 0 || tmp_long > INT_MAX)
397 error (0, 0, _("%s: invalid number of lines"), optarg);
398 usage (EXIT_FAILURE);
400 accum = (int) tmp_long;
401 break;
403 case 'C':
404 if (split_type != type_undef)
406 error (0, 0, _("cannot split in more than one way"));
407 usage (EXIT_FAILURE);
410 split_type = type_byteslines;
411 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
412 || tmp_long < 0 || tmp_long > INT_MAX)
414 error (0, 0, _("%s: invalid number of bytes"), optarg);
415 usage (EXIT_FAILURE);
417 accum = (int) tmp_long;
418 break;
420 case '0':
421 case '1':
422 case '2':
423 case '3':
424 case '4':
425 case '5':
426 case '6':
427 case '7':
428 case '8':
429 case '9':
430 if (split_type != type_undef && split_type != type_digits)
432 error (0, 0, _("cannot split in more than one way"));
433 usage (EXIT_FAILURE);
435 if (digits_optind != 0 && digits_optind != this_optind)
436 accum = 0; /* More than one number given; ignore other. */
437 digits_optind = this_optind;
438 split_type = type_digits;
439 accum = accum * 10 + c - '0';
440 break;
442 case 2:
443 verbose = 1;
444 break;
446 default:
447 usage (EXIT_FAILURE);
451 if (show_version)
453 printf ("split (%s) %s\n", GNU_PACKAGE, VERSION);
454 exit (EXIT_SUCCESS);
457 if (show_help)
458 usage (0);
460 /* Handle default case. */
461 if (split_type == type_undef)
463 split_type = type_lines;
464 accum = 1000;
467 if (accum < 1)
469 error (0, 0, _("invalid number"));
470 usage (EXIT_FAILURE);
472 num = accum;
474 /* Get out the filename arguments. */
476 if (optind < argc)
477 infile = argv[optind++];
479 if (optind < argc)
480 outbase = argv[optind++];
482 if (optind < argc)
484 error (0, 0, _("too many arguments"));
485 usage (EXIT_FAILURE);
488 /* Open the input file. */
489 if (STREQ (infile, "-"))
490 input_desc = 0;
491 else
493 input_desc = open (infile, O_RDONLY);
494 if (input_desc < 0)
495 error (EXIT_FAILURE, errno, "%s", infile);
498 /* No output file is open now. */
499 output_desc = -1;
501 /* Copy the output file prefix so we can add suffixes to it.
502 26**29 is certainly enough output files! */
504 outfile = xmalloc (strlen (outbase) + 30);
505 strcpy (outfile, outbase);
506 outfile_mid = outfile + strlen (outfile);
507 outfile_end = outfile_mid + 2;
508 memset (outfile_mid, 0, 30);
509 outfile_mid[0] = 'a';
510 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
512 /* Get the optimal block size of input device and make a buffer. */
514 if (fstat (input_desc, &stat_buf) < 0)
515 error (EXIT_FAILURE, errno, "%s", infile);
516 in_blk_size = ST_BLKSIZE (stat_buf);
518 buf = xmalloc (in_blk_size + 1);
520 switch (split_type)
522 case type_digits:
523 case type_lines:
524 lines_split (num, buf, in_blk_size);
525 break;
527 case type_bytes:
528 bytes_split (num, buf, in_blk_size);
529 break;
531 case type_byteslines:
532 line_bytes_split (num);
533 break;
535 default:
536 abort ();
539 if (close (input_desc) < 0)
540 error (EXIT_FAILURE, errno, "%s", infile);
541 if (output_desc >= 0 && close (output_desc) < 0)
542 error (EXIT_FAILURE, errno, "%s", outfile);
544 exit (EXIT_SUCCESS);