.
[coreutils.git] / src / split.c
blobbdc89e5519dddaa40c22f2101e0ffac6cafc4819
1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
20 To do:
21 * Implement -t CHAR or -t REGEX to specify break characters other
22 than newline. */
24 #include <config.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <sys/types.h>
30 #if HAVE_LIMITS_H
31 # include <limits.h>
32 #endif
34 #ifndef UINT_MAX
35 # define UINT_MAX ((unsigned int) ~(unsigned int) 0)
36 #endif
38 #ifndef INT_MAX
39 # define INT_MAX ((int) (UINT_MAX >> 1))
40 #endif
42 #include "system.h"
43 #include "version.h"
44 #include "error.h"
45 #include "xstrtol.h"
47 char *xmalloc ();
48 int full_write ();
49 int safe_read ();
51 /* The name this program was run with. */
52 char *program_name;
54 /* Base name of output files. */
55 static char *outfile;
57 /* Pointer to the end of the prefix in OUTFILE.
58 Suffixes are inserted here. */
59 static char *outfile_mid;
61 /* Pointer to the end of OUTFILE. */
62 static char *outfile_end;
64 /* Name of input file. May be "-". */
65 static char *infile;
67 /* Descriptor on which input file is open. */
68 static int input_desc;
70 /* Descriptor on which output file is open. */
71 static int output_desc;
73 /* If nonzero, display usage information and exit. */
74 static int show_help;
76 /* If nonzero, print the version on standard output then exit. */
77 static int show_version;
79 /* If nonzero, print a diagnostic on standard error just before each
80 output file is opened. */
81 static int verbose;
83 static struct option const longopts[] =
85 {"bytes", required_argument, NULL, 'b'},
86 {"lines", required_argument, NULL, 'l'},
87 {"line-bytes", required_argument, NULL, 'C'},
88 {"verbose", no_argument, NULL, 2},
89 {"help", no_argument, &show_help, 1},
90 {"version", no_argument, &show_version, 1},
91 {NULL, 0, NULL, 0}
94 static void
95 usage (int status, const char *reason)
97 if (reason != NULL)
98 fprintf (stderr, "%s: %s\n", program_name, reason);
100 if (status != 0)
101 fprintf (stderr, _("Try `%s --help' for more information.\n"),
102 program_name);
103 else
105 printf (_("\
106 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
108 program_name);
109 printf (_("\
110 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
111 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
113 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
114 -b, --bytes=SIZE put SIZE bytes per output file\n\
115 -l, --lines=NUMBER put NUMBER lines per output file\n\
116 --verbose print a diagnostic to standard error just\n\
117 before each output file is opened\n\
118 -NUMBER same as -l NUMBER\n\
119 --help display this help and exit\n\
120 --version output version information and exit\n\
122 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
123 "));
125 exit (status);
128 /* Compute the next sequential output file name suffix and store it
129 into the string `outfile' at the position pointed to by `outfile_mid'. */
131 static void
132 next_file_name (void)
134 int x;
135 char *ne;
136 unsigned int i;
138 static int first_call = 1;
140 /* Status for outfile name generation. */
141 static unsigned outfile_count = 0;
142 static unsigned outfile_name_limit = 25 * 26;
143 static unsigned outfile_name_generation = 1;
145 if (!first_call)
146 outfile_count++;
147 first_call = 0;
148 if (outfile_count < outfile_name_limit)
150 for (ne = outfile_end - 1; ; ne--)
152 x = *ne;
153 if (x != 'z')
154 break;
155 *ne = 'a';
157 *ne = x + 1;
158 return;
161 outfile_count = 0;
162 outfile_name_limit *= 26;
163 outfile_name_generation++;
164 *outfile_mid++ = 'z';
165 for (i = 0; i <= outfile_name_generation; i++)
166 outfile_mid[i] = 'a';
167 outfile_end += 2;
170 /* Write BYTES bytes at BP to an output file.
171 If NEW_FILE_FLAG is nonzero, open the next output file.
172 Otherwise add to the same output file already in use. */
174 static void
175 cwrite (int new_file_flag, const char *bp, int bytes)
177 if (new_file_flag)
179 if (output_desc >= 0 && close (output_desc) < 0)
180 error (1, errno, "%s", outfile);
182 next_file_name ();
183 if (verbose)
184 fprintf (stderr, _("creating file `%s'\n"), outfile);
185 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
186 if (output_desc < 0)
187 error (1, errno, "%s", outfile);
189 if (full_write (output_desc, bp, bytes) < 0)
190 error (1, errno, "%s", outfile);
193 /* Read NCHARS bytes from the input file into BUF.
194 Return the number of bytes successfully read.
195 If this is less than NCHARS, do not call `stdread' again. */
197 static int
198 stdread (char *buf, int nchars)
200 int n_read;
201 int to_be_read = nchars;
203 while (to_be_read)
205 n_read = safe_read (input_desc, buf, to_be_read);
206 if (n_read < 0)
207 return -1;
208 if (n_read == 0)
209 break;
210 to_be_read -= n_read;
211 buf += n_read;
213 return nchars - to_be_read;
216 /* Split into pieces of exactly NCHARS bytes.
217 Use buffer BUF, whose size is BUFSIZE. */
219 static void
220 bytes_split (int nchars, char *buf, int bufsize)
222 int n_read;
223 int new_file_flag = 1;
224 int to_read;
225 int to_write = nchars;
226 char *bp_out;
230 n_read = stdread (buf, bufsize);
231 if (n_read < 0)
232 error (1, errno, "%s", infile);
233 bp_out = buf;
234 to_read = n_read;
235 for (;;)
237 if (to_read < to_write)
239 if (to_read) /* do not write 0 bytes! */
241 cwrite (new_file_flag, bp_out, to_read);
242 to_write -= to_read;
243 new_file_flag = 0;
245 break;
247 else
249 cwrite (new_file_flag, bp_out, to_write);
250 bp_out += to_write;
251 to_read -= to_write;
252 new_file_flag = 1;
253 to_write = nchars;
257 while (n_read == bufsize);
260 /* Split into pieces of exactly NLINES lines.
261 Use buffer BUF, whose size is BUFSIZE. */
263 static void
264 lines_split (int nlines, char *buf, int bufsize)
266 int n_read;
267 char *bp, *bp_out, *eob;
268 int new_file_flag = 1;
269 int n = 0;
273 n_read = stdread (buf, bufsize);
274 if (n_read < 0)
275 error (1, errno, "%s", infile);
276 bp = bp_out = buf;
277 eob = bp + n_read;
278 *eob = '\n';
279 for (;;)
281 while (*bp++ != '\n')
282 ; /* this semicolon takes most of the time */
283 if (bp > eob)
285 if (eob != bp_out) /* do not write 0 bytes! */
287 cwrite (new_file_flag, bp_out, eob - bp_out);
288 new_file_flag = 0;
290 break;
292 else
293 if (++n >= nlines)
295 cwrite (new_file_flag, bp_out, bp - bp_out);
296 bp_out = bp;
297 new_file_flag = 1;
298 n = 0;
302 while (n_read == bufsize);
305 /* Split into pieces that are as large as possible while still not more
306 than NCHARS bytes, and are split on line boundaries except
307 where lines longer than NCHARS bytes occur. */
309 static void
310 line_bytes_split (int nchars)
312 int n_read;
313 char *bp;
314 int eof = 0;
315 int n_buffered = 0;
316 char *buf = (char *) xmalloc (nchars);
320 /* Fill up the full buffer size from the input file. */
322 n_read = stdread (buf + n_buffered, nchars - n_buffered);
323 if (n_read < 0)
324 error (1, errno, "%s", infile);
326 n_buffered += n_read;
327 if (n_buffered != nchars)
328 eof = 1;
330 /* Find where to end this chunk. */
331 bp = buf + n_buffered;
332 if (n_buffered == nchars)
334 while (bp > buf && bp[-1] != '\n')
335 bp--;
338 /* If chunk has no newlines, use all the chunk. */
339 if (bp == buf)
340 bp = buf + n_buffered;
342 /* Output the chars as one output file. */
343 cwrite (1, buf, bp - buf);
345 /* Discard the chars we just output; move rest of chunk
346 down to be the start of the next chunk. Source and
347 destination probably overlap. */
348 n_buffered -= bp - buf;
349 if (n_buffered > 0)
350 memmove (buf, bp, n_buffered);
352 while (!eof);
353 free (buf);
356 void
357 main (int argc, char **argv)
359 struct stat stat_buf;
360 int num; /* numeric argument from command line */
361 enum
363 type_undef, type_bytes, type_byteslines, type_lines, type_digits
364 } split_type = type_undef;
365 int in_blk_size; /* optimal block size of input file device */
366 char *buf; /* file i/o buffer */
367 int accum = 0;
368 char *outbase;
369 int c;
370 int digits_optind = 0;
372 program_name = argv[0];
373 setlocale (LC_ALL, "");
374 bindtextdomain (PACKAGE, LOCALEDIR);
375 textdomain (PACKAGE);
377 /* Parse command line options. */
379 infile = "-";
380 outbase = "x";
382 while (1)
384 /* This is the argv-index of the option we will read next. */
385 int this_optind = optind ? optind : 1;
386 long int tmp_long;
388 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
389 if (c == EOF)
390 break;
392 switch (c)
394 case 0:
395 break;
397 case 'b':
398 if (split_type != type_undef)
399 usage (2, _("cannot split in more than one way"));
400 split_type = type_bytes;
401 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
402 || tmp_long < 0 || tmp_long > INT_MAX)
403 usage (2, _("invalid number of bytes"));
404 accum = (int) tmp_long;
405 break;
407 case 'l':
408 if (split_type != type_undef)
409 usage (2, _("cannot split in more than one way"));
410 split_type = type_lines;
411 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
412 || tmp_long < 0 || tmp_long > INT_MAX)
413 usage (2, _("invalid number of lines"));
414 accum = (int) tmp_long;
415 break;
417 case 'C':
418 if (split_type != type_undef)
419 usage (2, _("cannot split in more than one way"));
420 split_type = type_byteslines;
421 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
422 || tmp_long < 0 || tmp_long > INT_MAX)
423 usage (2, _("invalid number of bytes"));
424 accum = (int) tmp_long;
425 break;
427 case '0':
428 case '1':
429 case '2':
430 case '3':
431 case '4':
432 case '5':
433 case '6':
434 case '7':
435 case '8':
436 case '9':
437 if (split_type != type_undef && split_type != type_digits)
438 usage (2, _("cannot split in more than one way"));
439 if (digits_optind != 0 && digits_optind != this_optind)
440 accum = 0; /* More than one number given; ignore other. */
441 digits_optind = this_optind;
442 split_type = type_digits;
443 accum = accum * 10 + c - '0';
444 break;
446 case 2:
447 verbose = 1;
448 break;
450 default:
451 usage (2, (char *)0);
455 if (show_version)
457 printf ("split - %s\n", version_string);
458 exit (0);
461 if (show_help)
462 usage (0, (char *)0);
464 /* Handle default case. */
465 if (split_type == type_undef)
467 split_type = type_lines;
468 accum = 1000;
471 if (accum < 1)
472 usage (2, _("invalid number"));
473 num = accum;
475 /* Get out the filename arguments. */
477 if (optind < argc)
478 infile = argv[optind++];
480 if (optind < argc)
481 outbase = argv[optind++];
483 if (optind < argc)
484 usage (2, _("too many arguments"));
486 /* Open the input file. */
487 if (!strcmp (infile, "-"))
488 input_desc = 0;
489 else
491 input_desc = open (infile, O_RDONLY);
492 if (input_desc < 0)
493 error (1, errno, "%s", infile);
496 /* No output file is open now. */
497 output_desc = -1;
499 /* Copy the output file prefix so we can add suffixes to it.
500 26**29 is certainly enough output files! */
502 outfile = xmalloc (strlen (outbase) + 30);
503 strcpy (outfile, outbase);
504 outfile_mid = outfile + strlen (outfile);
505 outfile_end = outfile_mid + 2;
506 memset (outfile_mid, 0, 30);
507 outfile_mid[0] = 'a';
508 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
510 /* Get the optimal block size of input device and make a buffer. */
512 if (fstat (input_desc, &stat_buf) < 0)
513 error (1, errno, "%s", infile);
514 in_blk_size = ST_BLKSIZE (stat_buf);
516 buf = xmalloc (in_blk_size + 1);
518 switch (split_type)
520 case type_digits:
521 case type_lines:
522 lines_split (num, buf, in_blk_size);
523 break;
525 case type_bytes:
526 bytes_split (num, buf, in_blk_size);
527 break;
529 case type_byteslines:
530 line_bytes_split (num);
531 break;
533 default:
534 abort ();
537 if (close (input_desc) < 0)
538 error (1, errno, "%s", infile);
539 if (output_desc >= 0 && close (output_desc) < 0)
540 error (1, errno, "%s", outfile);
542 exit (0);