update Copyright years for 1996
[coreutils.git] / src / split.c
blobf1e4dc350f6446412868e203b303332bc1f91de1
1 /* split.c -- split a file into pieces.
2 Copyright (C) 88, 91, 95, 1996 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* By tege@sics.se, with rms.
20 To do:
21 * Implement -t CHAR or -t REGEX to specify break characters other
22 than newline. */
24 #include <config.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <sys/types.h>
30 #if HAVE_LIMITS_H
31 # include <limits.h>
32 #endif
34 #ifndef UINT_MAX
35 # define UINT_MAX ((unsigned int) ~(unsigned int) 0)
36 #endif
38 #ifndef INT_MAX
39 # define INT_MAX ((int) (UINT_MAX >> 1))
40 #endif
42 #include "system.h"
43 #include "error.h"
44 #include "xstrtol.h"
46 char *xmalloc ();
47 int full_write ();
48 int safe_read ();
50 /* The name this program was run with. */
51 char *program_name;
53 /* Base name of output files. */
54 static char *outfile;
56 /* Pointer to the end of the prefix in OUTFILE.
57 Suffixes are inserted here. */
58 static char *outfile_mid;
60 /* Pointer to the end of OUTFILE. */
61 static char *outfile_end;
63 /* Name of input file. May be "-". */
64 static char *infile;
66 /* Descriptor on which input file is open. */
67 static int input_desc;
69 /* Descriptor on which output file is open. */
70 static int output_desc;
72 /* If nonzero, display usage information and exit. */
73 static int show_help;
75 /* If nonzero, print the version on standard output then exit. */
76 static int show_version;
78 /* If nonzero, print a diagnostic on standard error just before each
79 output file is opened. */
80 static int verbose;
82 static struct option const longopts[] =
84 {"bytes", required_argument, NULL, 'b'},
85 {"lines", required_argument, NULL, 'l'},
86 {"line-bytes", required_argument, NULL, 'C'},
87 {"verbose", no_argument, NULL, 2},
88 {"help", no_argument, &show_help, 1},
89 {"version", no_argument, &show_version, 1},
90 {NULL, 0, NULL, 0}
93 static void
94 usage (int status, const char *reason)
96 if (reason != NULL)
97 fprintf (stderr, "%s: %s\n", program_name, reason);
99 if (status != 0)
100 fprintf (stderr, _("Try `%s --help' for more information.\n"),
101 program_name);
102 else
104 printf (_("\
105 Usage: %s [OPTION] [INPUT [PREFIX]]\n\
107 program_name);
108 printf (_("\
109 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
110 PREFIX is `x'. With no INPUT, or when INPUT is -, read standard input.\n\
112 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
113 -b, --bytes=SIZE put SIZE bytes per output file\n\
114 -l, --lines=NUMBER put NUMBER lines per output file\n\
115 --verbose print a diagnostic to standard error just\n\
116 before each output file is opened\n\
117 -NUMBER same as -l NUMBER\n\
118 --help display this help and exit\n\
119 --version output version information and exit\n\
121 SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
122 "));
124 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
127 /* Compute the next sequential output file name suffix and store it
128 into the string `outfile' at the position pointed to by `outfile_mid'. */
130 static void
131 next_file_name (void)
133 int x;
134 char *ne;
135 unsigned int i;
137 static int first_call = 1;
139 /* Status for outfile name generation. */
140 static unsigned outfile_count = 0;
141 static unsigned outfile_name_limit = 25 * 26;
142 static unsigned outfile_name_generation = 1;
144 if (!first_call)
145 outfile_count++;
146 first_call = 0;
147 if (outfile_count < outfile_name_limit)
149 for (ne = outfile_end - 1; ; ne--)
151 x = *ne;
152 if (x != 'z')
153 break;
154 *ne = 'a';
156 *ne = x + 1;
157 return;
160 outfile_count = 0;
161 outfile_name_limit *= 26;
162 outfile_name_generation++;
163 *outfile_mid++ = 'z';
164 for (i = 0; i <= outfile_name_generation; i++)
165 outfile_mid[i] = 'a';
166 outfile_end += 2;
169 /* Write BYTES bytes at BP to an output file.
170 If NEW_FILE_FLAG is nonzero, open the next output file.
171 Otherwise add to the same output file already in use. */
173 static void
174 cwrite (int new_file_flag, const char *bp, int bytes)
176 if (new_file_flag)
178 if (output_desc >= 0 && close (output_desc) < 0)
179 error (EXIT_FAILURE, errno, "%s", outfile);
181 next_file_name ();
182 if (verbose)
183 fprintf (stderr, _("creating file `%s'\n"), outfile);
184 output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
185 if (output_desc < 0)
186 error (EXIT_FAILURE, errno, "%s", outfile);
188 if (full_write (output_desc, bp, bytes) < 0)
189 error (EXIT_FAILURE, errno, "%s", outfile);
192 /* Read NCHARS bytes from the input file into BUF.
193 Return the number of bytes successfully read.
194 If this is less than NCHARS, do not call `stdread' again. */
196 static int
197 stdread (char *buf, int nchars)
199 int n_read;
200 int to_be_read = nchars;
202 while (to_be_read)
204 n_read = safe_read (input_desc, buf, to_be_read);
205 if (n_read < 0)
206 return -1;
207 if (n_read == 0)
208 break;
209 to_be_read -= n_read;
210 buf += n_read;
212 return nchars - to_be_read;
215 /* Split into pieces of exactly NCHARS bytes.
216 Use buffer BUF, whose size is BUFSIZE. */
218 static void
219 bytes_split (int nchars, char *buf, int bufsize)
221 int n_read;
222 int new_file_flag = 1;
223 int to_read;
224 int to_write = nchars;
225 char *bp_out;
229 n_read = stdread (buf, bufsize);
230 if (n_read < 0)
231 error (EXIT_FAILURE, errno, "%s", infile);
232 bp_out = buf;
233 to_read = n_read;
234 for (;;)
236 if (to_read < to_write)
238 if (to_read) /* do not write 0 bytes! */
240 cwrite (new_file_flag, bp_out, to_read);
241 to_write -= to_read;
242 new_file_flag = 0;
244 break;
246 else
248 cwrite (new_file_flag, bp_out, to_write);
249 bp_out += to_write;
250 to_read -= to_write;
251 new_file_flag = 1;
252 to_write = nchars;
256 while (n_read == bufsize);
259 /* Split into pieces of exactly NLINES lines.
260 Use buffer BUF, whose size is BUFSIZE. */
262 static void
263 lines_split (int nlines, char *buf, int bufsize)
265 int n_read;
266 char *bp, *bp_out, *eob;
267 int new_file_flag = 1;
268 int n = 0;
272 n_read = stdread (buf, bufsize);
273 if (n_read < 0)
274 error (EXIT_FAILURE, errno, "%s", infile);
275 bp = bp_out = buf;
276 eob = bp + n_read;
277 *eob = '\n';
278 for (;;)
280 while (*bp++ != '\n')
281 ; /* this semicolon takes most of the time */
282 if (bp > eob)
284 if (eob != bp_out) /* do not write 0 bytes! */
286 cwrite (new_file_flag, bp_out, eob - bp_out);
287 new_file_flag = 0;
289 break;
291 else
292 if (++n >= nlines)
294 cwrite (new_file_flag, bp_out, bp - bp_out);
295 bp_out = bp;
296 new_file_flag = 1;
297 n = 0;
301 while (n_read == bufsize);
304 /* Split into pieces that are as large as possible while still not more
305 than NCHARS bytes, and are split on line boundaries except
306 where lines longer than NCHARS bytes occur. */
308 static void
309 line_bytes_split (int nchars)
311 int n_read;
312 char *bp;
313 int eof = 0;
314 int n_buffered = 0;
315 char *buf = (char *) xmalloc (nchars);
319 /* Fill up the full buffer size from the input file. */
321 n_read = stdread (buf + n_buffered, nchars - n_buffered);
322 if (n_read < 0)
323 error (EXIT_FAILURE, errno, "%s", infile);
325 n_buffered += n_read;
326 if (n_buffered != nchars)
327 eof = 1;
329 /* Find where to end this chunk. */
330 bp = buf + n_buffered;
331 if (n_buffered == nchars)
333 while (bp > buf && bp[-1] != '\n')
334 bp--;
337 /* If chunk has no newlines, use all the chunk. */
338 if (bp == buf)
339 bp = buf + n_buffered;
341 /* Output the chars as one output file. */
342 cwrite (1, buf, bp - buf);
344 /* Discard the chars we just output; move rest of chunk
345 down to be the start of the next chunk. Source and
346 destination probably overlap. */
347 n_buffered -= bp - buf;
348 if (n_buffered > 0)
349 memmove (buf, bp, n_buffered);
351 while (!eof);
352 free (buf);
356 main (int argc, char **argv)
358 struct stat stat_buf;
359 int num; /* numeric argument from command line */
360 enum
362 type_undef, type_bytes, type_byteslines, type_lines, type_digits
363 } split_type = type_undef;
364 int in_blk_size; /* optimal block size of input file device */
365 char *buf; /* file i/o buffer */
366 int accum = 0;
367 char *outbase;
368 int c;
369 int digits_optind = 0;
371 program_name = argv[0];
372 setlocale (LC_ALL, "");
373 bindtextdomain (PACKAGE, LOCALEDIR);
374 textdomain (PACKAGE);
376 /* Parse command line options. */
378 infile = "-";
379 outbase = "x";
381 while (1)
383 /* This is the argv-index of the option we will read next. */
384 int this_optind = optind ? optind : 1;
385 long int tmp_long;
387 c = getopt_long (argc, argv, "0123456789vb:l:C:", longopts, (int *) 0);
388 if (c == EOF)
389 break;
391 switch (c)
393 case 0:
394 break;
396 case 'b':
397 if (split_type != type_undef)
398 usage (2, _("cannot split in more than one way"));
399 split_type = type_bytes;
400 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
401 || tmp_long < 0 || tmp_long > INT_MAX)
402 usage (2, _("invalid number of bytes"));
403 accum = (int) tmp_long;
404 break;
406 case 'l':
407 if (split_type != type_undef)
408 usage (2, _("cannot split in more than one way"));
409 split_type = type_lines;
410 if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
411 || tmp_long < 0 || tmp_long > INT_MAX)
412 usage (2, _("invalid number of lines"));
413 accum = (int) tmp_long;
414 break;
416 case 'C':
417 if (split_type != type_undef)
418 usage (2, _("cannot split in more than one way"));
419 split_type = type_byteslines;
420 if (xstrtol (optarg, NULL, 10, &tmp_long, "bkm") != LONGINT_OK
421 || tmp_long < 0 || tmp_long > INT_MAX)
422 usage (2, _("invalid number of bytes"));
423 accum = (int) tmp_long;
424 break;
426 case '0':
427 case '1':
428 case '2':
429 case '3':
430 case '4':
431 case '5':
432 case '6':
433 case '7':
434 case '8':
435 case '9':
436 if (split_type != type_undef && split_type != type_digits)
437 usage (2, _("cannot split in more than one way"));
438 if (digits_optind != 0 && digits_optind != this_optind)
439 accum = 0; /* More than one number given; ignore other. */
440 digits_optind = this_optind;
441 split_type = type_digits;
442 accum = accum * 10 + c - '0';
443 break;
445 case 2:
446 verbose = 1;
447 break;
449 default:
450 usage (2, (char *)0);
454 if (show_version)
456 printf ("split - %s\n", PACKAGE_VERSION);
457 exit (EXIT_SUCCESS);
460 if (show_help)
461 usage (0, (char *)0);
463 /* Handle default case. */
464 if (split_type == type_undef)
466 split_type = type_lines;
467 accum = 1000;
470 if (accum < 1)
471 usage (2, _("invalid number"));
472 num = accum;
474 /* Get out the filename arguments. */
476 if (optind < argc)
477 infile = argv[optind++];
479 if (optind < argc)
480 outbase = argv[optind++];
482 if (optind < argc)
483 usage (2, _("too many arguments"));
485 /* Open the input file. */
486 if (!strcmp (infile, "-"))
487 input_desc = 0;
488 else
490 input_desc = open (infile, O_RDONLY);
491 if (input_desc < 0)
492 error (EXIT_FAILURE, errno, "%s", infile);
495 /* No output file is open now. */
496 output_desc = -1;
498 /* Copy the output file prefix so we can add suffixes to it.
499 26**29 is certainly enough output files! */
501 outfile = xmalloc (strlen (outbase) + 30);
502 strcpy (outfile, outbase);
503 outfile_mid = outfile + strlen (outfile);
504 outfile_end = outfile_mid + 2;
505 memset (outfile_mid, 0, 30);
506 outfile_mid[0] = 'a';
507 outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
509 /* Get the optimal block size of input device and make a buffer. */
511 if (fstat (input_desc, &stat_buf) < 0)
512 error (EXIT_FAILURE, errno, "%s", infile);
513 in_blk_size = ST_BLKSIZE (stat_buf);
515 buf = xmalloc (in_blk_size + 1);
517 switch (split_type)
519 case type_digits:
520 case type_lines:
521 lines_split (num, buf, in_blk_size);
522 break;
524 case type_bytes:
525 bytes_split (num, buf, in_blk_size);
526 break;
528 case type_byteslines:
529 line_bytes_split (num);
530 break;
532 default:
533 abort ();
536 if (close (input_desc) < 0)
537 error (EXIT_FAILURE, errno, "%s", infile);
538 if (output_desc >= 0 && close (output_desc) < 0)
539 error (EXIT_FAILURE, errno, "%s", outfile);
541 exit (EXIT_SUCCESS);