.
[coreutils.git] / src / split.c
blobc821921383d666f16987101ce46f5f066b09c83a
1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 95, 96, 1997, 1998 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
20 To do:
21 * Implement -t CHAR or -t REGEX to specify break characters other
22 than newline. */
24 #include <config.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <sys/types.h>
30 #include "system.h"
31 #include "error.h"
32 #include "xstrtol.h"
34 int full_write ();
35 int safe_read ();
37 /* The name this program was run with. */
38 char *program_name;
40 /* Base name of output files. */
41 static char *outfile;
43 /* Pointer to the end of the prefix in OUTFILE.
44 Suffixes are inserted here. */
45 static char *outfile_mid;
47 /* Pointer to the end of OUTFILE. */
48 static char *outfile_end;
50 /* Name of input file. May be "-". */
51 static char *infile;
53 /* Descriptor on which input file is open. */
54 static int input_desc;
56 /* Descriptor on which output file is open. */
57 static int output_desc;
59 /* If nonzero, display usage information and exit. */
60 static int show_help;
62 /* If nonzero, print the version on standard output then exit. */
63 static int show_version;
65 /* If nonzero, print a diagnostic on standard error just before each
66 output file is opened. */
67 static int verbose;
69 static struct option const longopts[] =
71 {"bytes", required_argument, NULL, 'b'},
72 {"lines", required_argument, NULL, 'l'},
73 {"line-bytes", required_argument, NULL, 'C'},
74 {"verbose", no_argument, NULL, 2},
75 {"help", no_argument, &show_help, 1},
76 {"version", no_argument, &show_version, 1},
77 {NULL, 0, NULL, 0}
80 static void
81 usage (int status)
83 if (status != 0)
84 fprintf (stderr, _("Try `%s --help' for more information.\n"),
85 program_name);
86 else
88 printf (_("\
89 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
90 "),
91 program_name);
92 printf (_("\
93 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
94 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
95 \n\
96 -b, --bytes=SIZE put SIZE bytes per output file\n\
97 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
98 -l, --lines=NUMBER put NUMBER lines per output file\n\
99 -NUMBER same as -l NUMBER\n\
100 --verbose print a diagnostic to standard error just\n\
101 before each output file is opened\n\
102 --help display this help and exit\n\
103 --version output version information and exit\n\
105 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
106 "));
107 puts (_("\nReport bugs to <textutils-bugs@gnu.org>."));
109 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
112 /* Compute the next sequential output file name suffix and store it
113 into the string `outfile' at the position pointed to by `outfile_mid'. */
115 static void
116 next_file_name (void)
118 int x;
119 char *ne;
120 unsigned int i;
122 static int first_call = 1;
124 /* Status for outfile name generation. */
125 static unsigned outfile_count = 0;
126 static unsigned outfile_name_limit = 25 * 26;
127 static unsigned outfile_name_generation = 1;
129 if (!first_call)
130 outfile_count++;
131 first_call = 0;
132 if (outfile_count < outfile_name_limit)
134 for (ne = outfile_end - 1; ; ne--)
136 x = *ne;
137 if (x != 'z')
138 break;
139 *ne = 'a';
141 *ne = x + 1;
142 return;
145 outfile_count = 0;
146 outfile_name_limit *= 26;
147 outfile_name_generation++;
148 *outfile_mid++ = 'z';
149 for (i = 0; i <= outfile_name_generation; i++)
150 outfile_mid[i] = 'a';
151 outfile_end += 2;
154 /* Write BYTES bytes at BP to an output file.
155 If NEW_FILE_FLAG is nonzero, open the next output file.
156 Otherwise add to the same output file already in use. */
158 static void
159 cwrite (int new_file_flag, const char *bp, int bytes)
161 if (new_file_flag)
163 if (output_desc >= 0 && close (output_desc) < 0)
164 error (EXIT_FAILURE, errno, "%s", outfile);
166 next_file_name ();
167 if (verbose)
168 fprintf (stderr, _("creating file `%s'\n"), outfile);
169 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
170 if (output_desc < 0)
171 error (EXIT_FAILURE, errno, "%s", outfile);
173 if (full_write (output_desc, bp, bytes) < 0)
174 error (EXIT_FAILURE, errno, "%s", outfile);
177 /* Read NCHARS bytes from the input file into BUF.
178 Return the number of bytes successfully read.
179 If this is less than NCHARS, do not call `stdread' again. */
181 static int
182 stdread (char *buf, int nchars)
184 int n_read;
185 int to_be_read = nchars;
187 while (to_be_read)
189 n_read = safe_read (input_desc, buf, to_be_read);
190 if (n_read < 0)
191 return -1;
192 if (n_read == 0)
193 break;
194 to_be_read -= n_read;
195 buf += n_read;
197 return nchars - to_be_read;
200 /* Split into pieces of exactly NCHARS bytes.
201 Use buffer BUF, whose size is BUFSIZE. */
203 static void
204 bytes_split (int nchars, char *buf, int bufsize)
206 int n_read;
207 int new_file_flag = 1;
208 int to_read;
209 int to_write = nchars;
210 char *bp_out;
214 n_read = stdread (buf, bufsize);
215 if (n_read < 0)
216 error (EXIT_FAILURE, errno, "%s", infile);
217 bp_out = buf;
218 to_read = n_read;
219 for (;;)
221 if (to_read < to_write)
223 if (to_read) /* do not write 0 bytes! */
225 cwrite (new_file_flag, bp_out, to_read);
226 to_write -= to_read;
227 new_file_flag = 0;
229 break;
231 else
233 cwrite (new_file_flag, bp_out, to_write);
234 bp_out += to_write;
235 to_read -= to_write;
236 new_file_flag = 1;
237 to_write = nchars;
241 while (n_read == bufsize);
244 /* Split into pieces of exactly NLINES lines.
245 Use buffer BUF, whose size is BUFSIZE. */
247 static void
248 lines_split (int nlines, char *buf, int bufsize)
250 int n_read;
251 char *bp, *bp_out, *eob;
252 int new_file_flag = 1;
253 int n = 0;
257 n_read = stdread (buf, bufsize);
258 if (n_read < 0)
259 error (EXIT_FAILURE, errno, "%s", infile);
260 bp = bp_out = buf;
261 eob = bp + n_read;
262 *eob = '\n';
263 for (;;)
265 while (*bp++ != '\n')
266 ; /* this semicolon takes most of the time */
267 if (bp > eob)
269 if (eob != bp_out) /* do not write 0 bytes! */
271 cwrite (new_file_flag, bp_out, eob - bp_out);
272 new_file_flag = 0;
274 break;
276 else
277 if (++n >= nlines)
279 cwrite (new_file_flag, bp_out, bp - bp_out);
280 bp_out = bp;
281 new_file_flag = 1;
282 n = 0;
286 while (n_read == bufsize);
289 /* Split into pieces that are as large as possible while still not more
290 than NCHARS bytes, and are split on line boundaries except
291 where lines longer than NCHARS bytes occur. */
293 static void
294 line_bytes_split (int nchars)
296 int n_read;
297 char *bp;
298 int eof = 0;
299 int n_buffered = 0;
300 char *buf = (char *) xmalloc (nchars);
304 /* Fill up the full buffer size from the input file. */
306 n_read = stdread (buf + n_buffered, nchars - n_buffered);
307 if (n_read < 0)
308 error (EXIT_FAILURE, errno, "%s", infile);
310 n_buffered += n_read;
311 if (n_buffered != nchars)
312 eof = 1;
314 /* Find where to end this chunk. */
315 bp = buf + n_buffered;
316 if (n_buffered == nchars)
318 while (bp > buf && bp[-1] != '\n')
319 bp--;
322 /* If chunk has no newlines, use all the chunk. */
323 if (bp == buf)
324 bp = buf + n_buffered;
326 /* Output the chars as one output file. */
327 cwrite (1, buf, bp - buf);
329 /* Discard the chars we just output; move rest of chunk
330 down to be the start of the next chunk. Source and
331 destination probably overlap. */
332 n_buffered -= bp - buf;
333 if (n_buffered > 0)
334 memmove (buf, bp, n_buffered);
336 while (!eof);
337 free (buf);
341 main (int argc, char **argv)
343 struct stat stat_buf;
344 int num; /* numeric argument from command line */
345 enum
347 type_undef, type_bytes, type_byteslines, type_lines, type_digits
348 } split_type = type_undef;
349 int in_blk_size; /* optimal block size of input file device */
350 char *buf; /* file i/o buffer */
351 int accum = 0;
352 char *outbase;
353 int c;
354 int digits_optind = 0;
356 program_name = argv[0];
357 setlocale (LC_ALL, "");
358 bindtextdomain (PACKAGE, LOCALEDIR);
359 textdomain (PACKAGE);
361 /* Parse command line options. */
363 infile = "-";
364 outbase = "x";
366 while (1)
368 /* This is the argv-index of the option we will read next. */
369 int this_optind = optind ? optind : 1;
370 long int tmp_long;
372 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
373 if (c == EOF)
374 break;
376 switch (c)
378 case 0:
379 break;
381 case 'b':
382 if (split_type != type_undef)
384 error (0, 0, _("cannot split in more than one way"));
385 usage (EXIT_FAILURE);
387 split_type = type_bytes;
388 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
389 || tmp_long < 0 || tmp_long > INT_MAX)
391 error (0, 0, _("%s: invalid number of bytes"), optarg);
392 usage (EXIT_FAILURE);
394 accum = (int) tmp_long;
395 break;
397 case 'l':
398 if (split_type != type_undef)
400 error (0, 0, _("cannot split in more than one way"));
401 usage (EXIT_FAILURE);
403 split_type = type_lines;
404 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
405 || tmp_long < 0 || tmp_long > INT_MAX)
407 error (0, 0, _("%s: invalid number of lines"), optarg);
408 usage (EXIT_FAILURE);
410 accum = (int) tmp_long;
411 break;
413 case 'C':
414 if (split_type != type_undef)
416 error (0, 0, _("cannot split in more than one way"));
417 usage (EXIT_FAILURE);
420 split_type = type_byteslines;
421 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
422 || tmp_long < 0 || tmp_long > INT_MAX)
424 error (0, 0, _("%s: invalid number of bytes"), optarg);
425 usage (EXIT_FAILURE);
427 accum = (int) tmp_long;
428 break;
430 case '0':
431 case '1':
432 case '2':
433 case '3':
434 case '4':
435 case '5':
436 case '6':
437 case '7':
438 case '8':
439 case '9':
440 if (split_type != type_undef && split_type != type_digits)
442 error (0, 0, _("cannot split in more than one way"));
443 usage (EXIT_FAILURE);
445 if (digits_optind != 0 && digits_optind != this_optind)
446 accum = 0; /* More than one number given; ignore other. */
447 digits_optind = this_optind;
448 split_type = type_digits;
449 accum = accum * 10 + c - '0';
450 break;
452 case 2:
453 verbose = 1;
454 break;
456 default:
457 usage (EXIT_FAILURE);
461 if (show_version)
463 printf ("split (%s) %s\n", GNU_PACKAGE, VERSION);
464 exit (EXIT_SUCCESS);
467 if (show_help)
468 usage (0);
470 /* Handle default case. */
471 if (split_type == type_undef)
473 split_type = type_lines;
474 accum = 1000;
477 if (accum < 1)
479 error (0, 0, _("invalid number"));
480 usage (EXIT_FAILURE);
482 num = accum;
484 /* Get out the filename arguments. */
486 if (optind < argc)
487 infile = argv[optind++];
489 if (optind < argc)
490 outbase = argv[optind++];
492 if (optind < argc)
494 error (0, 0, _("too many arguments"));
495 usage (EXIT_FAILURE);
498 /* Open the input file. */
499 if (!strcmp (infile, "-"))
500 input_desc = 0;
501 else
503 input_desc = open (infile, O_RDONLY);
504 if (input_desc < 0)
505 error (EXIT_FAILURE, errno, "%s", infile);
508 /* No output file is open now. */
509 output_desc = -1;
511 /* Copy the output file prefix so we can add suffixes to it.
512 26**29 is certainly enough output files! */
514 outfile = xmalloc (strlen (outbase) + 30);
515 strcpy (outfile, outbase);
516 outfile_mid = outfile + strlen (outfile);
517 outfile_end = outfile_mid + 2;
518 memset (outfile_mid, 0, 30);
519 outfile_mid[0] = 'a';
520 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
522 /* Get the optimal block size of input device and make a buffer. */
524 if (fstat (input_desc, &stat_buf) < 0)
525 error (EXIT_FAILURE, errno, "%s", infile);
526 in_blk_size = ST_BLKSIZE (stat_buf);
528 buf = xmalloc (in_blk_size + 1);
530 switch (split_type)
532 case type_digits:
533 case type_lines:
534 lines_split (num, buf, in_blk_size);
535 break;
537 case type_bytes:
538 bytes_split (num, buf, in_blk_size);
539 break;
541 case type_byteslines:
542 line_bytes_split (num);
543 break;
545 default:
546 abort ();
549 if (close (input_desc) < 0)
550 error (EXIT_FAILURE, errno, "%s", infile);
551 if (output_desc >= 0 && close (output_desc) < 0)
552 error (EXIT_FAILURE, errno, "%s", outfile);
554 exit (EXIT_SUCCESS);