*** empty log message ***
[coreutils.git] / src / split.c
blobf4703952988e50f82a9eb86b29f2044aa6107791
1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 1995-2002 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
20 To do:
21 * Implement -t CHAR or -t REGEX to specify break characters other
22 than newline. */
24 #include <config.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <sys/types.h>
30 #include "system.h"
31 #include "closeout.h"
32 #include "dirname.h"
33 #include "error.h"
34 #include "full-write.h"
35 #include "posixver.h"
36 #include "safe-read.h"
37 #include "xstrtol.h"
39 /* The official name of this program (e.g., no `g' prefix). */
40 #define PROGRAM_NAME "split"
42 #define AUTHORS N_ ("Torbjorn Granlund and Richard M. Stallman")
44 #define DEFAULT_SUFFIX_LENGTH 2
46 /* The name this program was run with. */
47 char *program_name;
49 /* Base name of output files. */
50 static char const *outbase;
52 /* Name of output files. */
53 static char *outfile;
55 /* Pointer to the end of the prefix in OUTFILE.
56 Suffixes are inserted here. */
57 static char *outfile_mid;
59 /* Length of OUTFILE's suffix. */
60 static size_t suffix_length = DEFAULT_SUFFIX_LENGTH;
62 /* Name of input file. May be "-". */
63 static char *infile;
65 /* Descriptor on which input file is open. */
66 static int input_desc;
68 /* Descriptor on which output file is open. */
69 static int output_desc;
71 /* If nonzero, print a diagnostic on standard error just before each
72 output file is opened. */
73 static int verbose;
75 static struct option const longopts[] =
77 {"bytes", required_argument, NULL, 'b'},
78 {"lines", required_argument, NULL, 'l'},
79 {"line-bytes", required_argument, NULL, 'C'},
80 {"suffix-length", required_argument, NULL, 'a'},
81 {"verbose", no_argument, NULL, 2},
82 {GETOPT_HELP_OPTION_DECL},
83 {GETOPT_VERSION_OPTION_DECL},
84 {NULL, 0, NULL, 0}
87 void
88 usage (int status)
90 if (status != 0)
91 fprintf (stderr, _("Try `%s --help' for more information.\n"),
92 program_name);
93 else
95 printf (_("\
96 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
97 "),
98 program_name);
99 fputs (_("\
100 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
101 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
103 "), stdout);
104 fputs (_("\
105 Mandatory arguments to long options are mandatory for short options too.\n\
106 "), stdout);
107 fprintf (stdout, _("\
108 -a, --suffix-length=N use suffixes of length N (default %d)\n\
109 -b, --bytes=SIZE put SIZE bytes per output file\n\
110 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
111 -l, --lines=NUMBER put NUMBER lines per output file\n\
112 "), DEFAULT_SUFFIX_LENGTH);
113 fputs (_("\
114 --verbose print a diagnostic to standard error just\n\
115 before each output file is opened\n\
116 "), stdout);
117 fputs (HELP_OPTION_DESCRIPTION, stdout);
118 fputs (VERSION_OPTION_DESCRIPTION, stdout);
119 fputs (_("\
121 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
122 "), stdout);
123 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
125 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
128 /* Compute the next sequential output file name and store it into the
129 string `outfile'. */
131 static void
132 next_file_name (void)
134 if (! outfile)
136 /* Allocate and initialize the first file name. */
138 size_t outbase_length = strlen (outbase);
139 size_t outfile_length = outbase_length + suffix_length;
140 if (outfile_length + 1 < outbase_length)
141 xalloc_die ();
142 outfile = xmalloc (outfile_length + 1);
143 outfile_mid = outfile + outbase_length;
144 memcpy (outfile, outbase, outbase_length);
145 memset (outfile_mid, 'a', suffix_length);
146 outfile[outfile_length] = 0;
148 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
149 /* POSIX requires that if the output file name is too long for
150 its directory, `split' must fail without creating any files.
151 This must be checked for explicitly on operating systems that
152 silently truncate file names. */
154 char *dir = dir_name (outfile);
155 long name_max = pathconf (dir, _PC_NAME_MAX);
156 if (0 <= name_max && name_max < base_len (base_name (outfile)))
157 error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
158 free (dir);
160 #endif
162 else
164 /* Increment the suffix in place, if possible. */
166 char *p;
167 for (p = outfile_mid + suffix_length; outfile_mid < p; *--p = 'a')
168 if (p[-1]++ != 'z')
169 return;
170 error (EXIT_FAILURE, 0, _("Output file suffixes exhausted"));
174 /* Write BYTES bytes at BP to an output file.
175 If NEW_FILE_FLAG is nonzero, open the next output file.
176 Otherwise add to the same output file already in use. */
178 static void
179 cwrite (int new_file_flag, const char *bp, int bytes)
181 if (new_file_flag)
183 if (output_desc >= 0 && close (output_desc) < 0)
184 error (EXIT_FAILURE, errno, "%s", outfile);
186 next_file_name ();
187 if (verbose)
188 fprintf (stderr, _("creating file `%s'\n"), outfile);
189 output_desc = open (outfile,
190 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
191 if (output_desc < 0)
192 error (EXIT_FAILURE, errno, "%s", outfile);
194 if (full_write (output_desc, bp, bytes) != bytes)
195 error (EXIT_FAILURE, errno, "%s", outfile);
198 /* Read NCHARS bytes from the input file into BUF.
199 Return the number of bytes successfully read.
200 If this is less than NCHARS, do not call `stdread' again. */
202 static int
203 stdread (char *buf, int nchars)
205 int n_read;
206 int to_be_read = nchars;
208 while (to_be_read)
210 n_read = safe_read (input_desc, buf, to_be_read);
211 if (n_read < 0)
212 return -1;
213 if (n_read == 0)
214 break;
215 to_be_read -= n_read;
216 buf += n_read;
218 return nchars - to_be_read;
221 /* Split into pieces of exactly NCHARS bytes.
222 Use buffer BUF, whose size is BUFSIZE. */
224 static void
225 bytes_split (int nchars, char *buf, int bufsize)
227 int n_read;
228 int new_file_flag = 1;
229 int to_read;
230 int to_write = nchars;
231 char *bp_out;
235 n_read = stdread (buf, bufsize);
236 if (n_read < 0)
237 error (EXIT_FAILURE, errno, "%s", infile);
238 bp_out = buf;
239 to_read = n_read;
240 for (;;)
242 if (to_read < to_write)
244 if (to_read) /* do not write 0 bytes! */
246 cwrite (new_file_flag, bp_out, to_read);
247 to_write -= to_read;
248 new_file_flag = 0;
250 break;
252 else
254 cwrite (new_file_flag, bp_out, to_write);
255 bp_out += to_write;
256 to_read -= to_write;
257 new_file_flag = 1;
258 to_write = nchars;
262 while (n_read == bufsize);
265 /* Split into pieces of exactly NLINES lines.
266 Use buffer BUF, whose size is BUFSIZE. */
268 static void
269 lines_split (int nlines, char *buf, int bufsize)
271 int n_read;
272 char *bp, *bp_out, *eob;
273 int new_file_flag = 1;
274 int n = 0;
278 n_read = stdread (buf, bufsize);
279 if (n_read < 0)
280 error (EXIT_FAILURE, errno, "%s", infile);
281 bp = bp_out = buf;
282 eob = bp + n_read;
283 *eob = '\n';
284 for (;;)
286 while (*bp++ != '\n')
287 ; /* this semicolon takes most of the time */
288 if (bp > eob)
290 if (eob != bp_out) /* do not write 0 bytes! */
292 cwrite (new_file_flag, bp_out, eob - bp_out);
293 new_file_flag = 0;
295 break;
297 else
298 if (++n >= nlines)
300 cwrite (new_file_flag, bp_out, bp - bp_out);
301 bp_out = bp;
302 new_file_flag = 1;
303 n = 0;
307 while (n_read == bufsize);
310 /* Split into pieces that are as large as possible while still not more
311 than NCHARS bytes, and are split on line boundaries except
312 where lines longer than NCHARS bytes occur. */
314 static void
315 line_bytes_split (int nchars)
317 int n_read;
318 char *bp;
319 int eof = 0;
320 int n_buffered = 0;
321 char *buf = (char *) xmalloc (nchars);
325 /* Fill up the full buffer size from the input file. */
327 n_read = stdread (buf + n_buffered, nchars - n_buffered);
328 if (n_read < 0)
329 error (EXIT_FAILURE, errno, "%s", infile);
331 n_buffered += n_read;
332 if (n_buffered != nchars)
333 eof = 1;
335 /* Find where to end this chunk. */
336 bp = buf + n_buffered;
337 if (n_buffered == nchars)
339 while (bp > buf && bp[-1] != '\n')
340 bp--;
343 /* If chunk has no newlines, use all the chunk. */
344 if (bp == buf)
345 bp = buf + n_buffered;
347 /* Output the chars as one output file. */
348 cwrite (1, buf, bp - buf);
350 /* Discard the chars we just output; move rest of chunk
351 down to be the start of the next chunk. Source and
352 destination probably overlap. */
353 n_buffered -= bp - buf;
354 if (n_buffered > 0)
355 memmove (buf, bp, n_buffered);
357 while (!eof);
358 free (buf);
362 main (int argc, char **argv)
364 struct stat stat_buf;
365 int num; /* numeric argument from command line */
366 enum
368 type_undef, type_bytes, type_byteslines, type_lines, type_digits
369 } split_type = type_undef;
370 int in_blk_size; /* optimal block size of input file device */
371 char *buf; /* file i/o buffer */
372 int accum = 0;
373 int c;
374 int digits_optind = 0;
376 program_name = argv[0];
377 setlocale (LC_ALL, "");
378 bindtextdomain (PACKAGE, LOCALEDIR);
379 textdomain (PACKAGE);
381 atexit (close_stdout);
383 /* Parse command line options. */
385 infile = "-";
386 outbase = "x";
388 while (1)
390 /* This is the argv-index of the option we will read next. */
391 int this_optind = optind ? optind : 1;
392 long int tmp_long;
394 c = getopt_long (argc, argv, "0123456789C:a:b:l:", longopts, NULL);
395 if (c == -1)
396 break;
398 switch (c)
400 case 0:
401 break;
403 case 'a':
404 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
405 || tmp_long < 0 || tmp_long > SIZE_MAX)
407 error (0, 0, _("%s: invalid suffix length"), optarg);
408 usage (EXIT_FAILURE);
410 suffix_length = tmp_long;
411 break;
413 case 'b':
414 if (split_type != type_undef)
416 error (0, 0, _("cannot split in more than one way"));
417 usage (EXIT_FAILURE);
419 split_type = type_bytes;
420 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
421 || tmp_long < 0 || tmp_long > INT_MAX)
423 error (0, 0, _("%s: invalid number of bytes"), optarg);
424 usage (EXIT_FAILURE);
426 accum = (int) tmp_long;
427 break;
429 case 'l':
430 if (split_type != type_undef)
432 error (0, 0, _("cannot split in more than one way"));
433 usage (EXIT_FAILURE);
435 split_type = type_lines;
436 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
437 || tmp_long < 0 || tmp_long > INT_MAX)
439 error (0, 0, _("%s: invalid number of lines"), optarg);
440 usage (EXIT_FAILURE);
442 accum = (int) tmp_long;
443 break;
445 case 'C':
446 if (split_type != type_undef)
448 error (0, 0, _("cannot split in more than one way"));
449 usage (EXIT_FAILURE);
452 split_type = type_byteslines;
453 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
454 || tmp_long < 0 || tmp_long > INT_MAX)
456 error (0, 0, _("%s: invalid number of bytes"), optarg);
457 usage (EXIT_FAILURE);
459 accum = (int) tmp_long;
460 break;
462 case '0':
463 case '1':
464 case '2':
465 case '3':
466 case '4':
467 case '5':
468 case '6':
469 case '7':
470 case '8':
471 case '9':
472 if (split_type != type_undef && split_type != type_digits)
474 error (0, 0, _("cannot split in more than one way"));
475 usage (EXIT_FAILURE);
477 if (digits_optind != 0 && digits_optind != this_optind)
478 accum = 0; /* More than one number given; ignore other. */
479 digits_optind = this_optind;
480 split_type = type_digits;
481 accum = accum * 10 + c - '0';
482 break;
484 case 2:
485 verbose = 1;
486 break;
488 case_GETOPT_HELP_CHAR;
490 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
492 default:
493 usage (EXIT_FAILURE);
497 if (digits_optind && 200112 <= posix2_version ())
499 error (0, 0, _("`-%d' option is obsolete; use `-l %d'"), accum, accum);
500 usage (EXIT_FAILURE);
503 /* Handle default case. */
504 if (split_type == type_undef)
506 split_type = type_lines;
507 accum = 1000;
510 if (accum < 1)
512 error (0, 0, _("invalid number"));
513 usage (EXIT_FAILURE);
515 num = accum;
517 /* Get out the filename arguments. */
519 if (optind < argc)
520 infile = argv[optind++];
522 if (optind < argc)
523 outbase = argv[optind++];
525 if (optind < argc)
527 error (0, 0, _("too many arguments"));
528 usage (EXIT_FAILURE);
531 /* Open the input file. */
532 if (STREQ (infile, "-"))
533 input_desc = 0;
534 else
536 input_desc = open (infile, O_RDONLY);
537 if (input_desc < 0)
538 error (EXIT_FAILURE, errno, "%s", infile);
540 /* Binary I/O is safer when bytecounts are used. */
541 SET_BINARY (input_desc);
543 /* No output file is open now. */
544 output_desc = -1;
546 /* Get the optimal block size of input device and make a buffer. */
548 if (fstat (input_desc, &stat_buf) < 0)
549 error (EXIT_FAILURE, errno, "%s", infile);
550 in_blk_size = ST_BLKSIZE (stat_buf);
552 buf = xmalloc (in_blk_size + 1);
554 switch (split_type)
556 case type_digits:
557 case type_lines:
558 lines_split (num, buf, in_blk_size);
559 break;
561 case type_bytes:
562 bytes_split (num, buf, in_blk_size);
563 break;
565 case type_byteslines:
566 line_bytes_split (num);
567 break;
569 default:
570 abort ();
573 if (close (input_desc) < 0)
574 error (EXIT_FAILURE, errno, "%s", infile);
575 if (output_desc >= 0 && close (output_desc) < 0)
576 error (EXIT_FAILURE, errno, "%s", outfile);
578 exit (EXIT_SUCCESS);