split: honor $TMPDIR for temp files
[coreutils.git] / src / split.c
blobcf776364cd2e450dcc72eb24b94a6267da68074b
1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
19 TODO:
20 * support -p REGEX as in BSD's split.
21 * support --suppress-matched as in csplit. */
22 #include <config.h>
24 #include <stdckdint.h>
25 #include <stdio.h>
26 #include <getopt.h>
27 #include <signal.h>
28 #include <sys/types.h>
29 #include <sys/wait.h>
31 #include "system.h"
32 #include "alignalloc.h"
33 #include "assure.h"
34 #include "fadvise.h"
35 #include "fd-reopen.h"
36 #include "fcntl--.h"
37 #include "full-write.h"
38 #include "idx.h"
39 #include "ioblksize.h"
40 #include "quote.h"
41 #include "sig2str.h"
42 #include "sys-limits.h"
43 #include "temp-stream.h"
44 #include "xbinary-io.h"
45 #include "xdectoint.h"
46 #include "xstrtol.h"
48 /* The official name of this program (e.g., no 'g' prefix). */
49 #define PROGRAM_NAME "split"
51 #define AUTHORS \
52 proper_name ("Torbjorn Granlund"), \
53 proper_name ("Richard M. Stallman")
55 /* Shell command to filter through, instead of creating files. */
56 static char const *filter_command;
58 /* Process ID of the filter. */
59 static pid_t filter_pid;
61 /* Array of open pipes. */
62 static int *open_pipes;
63 static idx_t open_pipes_alloc;
64 static int n_open_pipes;
66 /* Whether SIGPIPE has the default action, when --filter is used. */
67 static bool default_SIGPIPE;
69 /* Base name of output files. */
70 static char const *outbase;
72 /* Name of output files. */
73 static char *outfile;
75 /* Pointer to the end of the prefix in OUTFILE.
76 Suffixes are inserted here. */
77 static char *outfile_mid;
79 /* Generate new suffix when suffixes are exhausted. */
80 static bool suffix_auto = true;
82 /* Length of OUTFILE's suffix. */
83 static idx_t suffix_length;
85 /* Alphabet of characters to use in suffix. */
86 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
88 /* Numerical suffix start value. */
89 static char const *numeric_suffix_start;
91 /* Additional suffix to append to output file names. */
92 static char const *additional_suffix;
94 /* Name of input file. May be "-". */
95 static char *infile;
97 /* stat buf for input file. */
98 static struct stat in_stat_buf;
100 /* Descriptor on which output file is open. */
101 static int output_desc = -1;
103 /* If true, print a diagnostic on standard error just before each
104 output file is opened. */
105 static bool verbose;
107 /* If true, don't generate zero length output files. */
108 static bool elide_empty_files;
110 /* If true, in round robin mode, immediately copy
111 input to output, which is much slower, so disabled by default. */
112 static bool unbuffered;
114 /* The character marking end of line. Defaults to \n below. */
115 static int eolchar = -1;
117 /* The split mode to use. */
118 enum Split_type
120 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
121 type_chunk_bytes, type_chunk_lines, type_rr
124 /* For long options that have no equivalent short option, use a
125 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
126 enum
128 VERBOSE_OPTION = CHAR_MAX + 1,
129 FILTER_OPTION,
130 IO_BLKSIZE_OPTION,
131 ADDITIONAL_SUFFIX_OPTION
134 static struct option const longopts[] =
136 {"bytes", required_argument, nullptr, 'b'},
137 {"lines", required_argument, nullptr, 'l'},
138 {"line-bytes", required_argument, nullptr, 'C'},
139 {"number", required_argument, nullptr, 'n'},
140 {"elide-empty-files", no_argument, nullptr, 'e'},
141 {"unbuffered", no_argument, nullptr, 'u'},
142 {"suffix-length", required_argument, nullptr, 'a'},
143 {"additional-suffix", required_argument, nullptr,
144 ADDITIONAL_SUFFIX_OPTION},
145 {"numeric-suffixes", optional_argument, nullptr, 'd'},
146 {"hex-suffixes", optional_argument, nullptr, 'x'},
147 {"filter", required_argument, nullptr, FILTER_OPTION},
148 {"verbose", no_argument, nullptr, VERBOSE_OPTION},
149 {"separator", required_argument, nullptr, 't'},
150 {"-io-blksize", required_argument, nullptr,
151 IO_BLKSIZE_OPTION}, /* do not document */
152 {GETOPT_HELP_OPTION_DECL},
153 {GETOPT_VERSION_OPTION_DECL},
154 {nullptr, 0, nullptr, 0}
157 /* Return true if the errno value, ERR, is ignorable. */
158 static inline bool
159 ignorable (int err)
161 return filter_command && err == EPIPE;
164 static void
165 set_suffix_length (intmax_t n_units, enum Split_type split_type)
167 #define DEFAULT_SUFFIX_LENGTH 2
169 int suffix_length_needed = 0;
171 /* The suffix auto length feature is incompatible with
172 a user specified start value as the generated suffixes
173 are not all consecutive. */
174 if (numeric_suffix_start)
175 suffix_auto = false;
177 /* Auto-calculate the suffix length if the number of files is given. */
178 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
179 || split_type == type_rr)
181 intmax_t n_units_end = n_units - 1;
182 if (numeric_suffix_start)
184 intmax_t n_start;
185 strtol_error e = xstrtoimax (numeric_suffix_start, nullptr, 10,
186 &n_start, "");
187 if (e == LONGINT_OK && n_start < n_units)
189 /* Restrict auto adjustment so we don't keep
190 incrementing a suffix size arbitrarily,
191 as that would break sort order for files
192 generated from multiple split runs. */
193 if (ckd_add (&n_units_end, n_units_end, n_start))
194 n_units_end = INTMAX_MAX;
198 idx_t alphabet_len = strlen (suffix_alphabet);
200 suffix_length_needed++;
201 while (n_units_end /= alphabet_len);
203 suffix_auto = false;
206 if (suffix_length) /* set by user */
208 if (suffix_length < suffix_length_needed)
209 error (EXIT_FAILURE, 0,
210 _("the suffix length needs to be at least %d"),
211 suffix_length_needed);
212 suffix_auto = false;
213 return;
215 else
216 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_length_needed);
219 void
220 usage (int status)
222 if (status != EXIT_SUCCESS)
223 emit_try_help ();
224 else
226 printf (_("\
227 Usage: %s [OPTION]... [FILE [PREFIX]]\n\
229 program_name);
230 fputs (_("\
231 Output pieces of FILE to PREFIXaa, PREFIXab, ...;\n\
232 default size is 1000 lines, and default PREFIX is 'x'.\n\
233 "), stdout);
235 emit_stdin_note ();
236 emit_mandatory_arg_note ();
238 fprintf (stdout, _("\
239 -a, --suffix-length=N generate suffixes of length N (default %d)\n\
240 --additional-suffix=SUFFIX append an additional SUFFIX to file names\n\
241 -b, --bytes=SIZE put SIZE bytes per output file\n\
242 -C, --line-bytes=SIZE put at most SIZE bytes of records per output file\n\
243 -d use numeric suffixes starting at 0, not alphabetic\n\
244 --numeric-suffixes[=FROM] same as -d, but allow setting the start value\
246 -x use hex suffixes starting at 0, not alphabetic\n\
247 --hex-suffixes[=FROM] same as -x, but allow setting the start value\n\
248 -e, --elide-empty-files do not generate empty output files with '-n'\n\
249 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
250 -l, --lines=NUMBER put NUMBER lines/records per output file\n\
251 -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\
252 -t, --separator=SEP use SEP instead of newline as the record separator;\n\
253 '\\0' (zero) specifies the NUL character\n\
254 -u, --unbuffered immediately copy input to output with '-n r/...'\n\
255 "), DEFAULT_SUFFIX_LENGTH);
256 fputs (_("\
257 --verbose print a diagnostic just before each\n\
258 output file is opened\n\
259 "), stdout);
260 fputs (HELP_OPTION_DESCRIPTION, stdout);
261 fputs (VERSION_OPTION_DESCRIPTION, stdout);
262 emit_size_note ();
263 fputs (_("\n\
264 CHUNKS may be:\n\
265 N split into N files based on size of input\n\
266 K/N output Kth of N to stdout\n\
267 l/N split into N files without splitting lines/records\n\
268 l/K/N output Kth of N to stdout without splitting lines/records\n\
269 r/N like 'l' but use round robin distribution\n\
270 r/K/N likewise but only output Kth of N to stdout\n\
271 "), stdout);
272 emit_ancillary_info (PROGRAM_NAME);
274 exit (status);
277 /* Copy the data in FD to a temporary file, then make that file FD.
278 Use BUF, of size BUFSIZE, to copy. Return the number of
279 bytes copied, or -1 (setting errno) on error. */
280 static off_t
281 copy_to_tmpfile (int fd, char *buf, idx_t bufsize)
283 FILE *tmp;
284 if (!temp_stream (&tmp, nullptr))
285 return -1;
286 off_t copied = 0;
287 off_t r;
289 while (0 < (r = read (fd, buf, bufsize)))
291 if (fwrite (buf, 1, r, tmp) != r)
292 return -1;
293 if (ckd_add (&copied, copied, r))
295 errno = EOVERFLOW;
296 return -1;
300 if (r < 0)
301 return r;
302 r = dup2 (fileno (tmp), fd);
303 if (r < 0)
304 return r;
305 if (fclose (tmp) < 0)
306 return -1;
307 return copied;
310 /* Return the number of bytes that can be read from FD with status ST.
311 Store up to the first BUFSIZE bytes of the file's data into BUF,
312 and advance the file position by the number of bytes read. On
313 input error, set errno and return -1. */
315 static off_t
316 input_file_size (int fd, struct stat const *st, char *buf, idx_t bufsize)
318 off_t size = 0;
321 ssize_t n_read = read (fd, buf + size, bufsize - size);
322 if (n_read <= 0)
323 return n_read < 0 ? n_read : size;
324 size += n_read;
326 while (size < bufsize);
328 off_t cur, end;
329 if ((usable_st_size (st) && st->st_size < size)
330 || (cur = lseek (fd, 0, SEEK_CUR)) < 0
331 || cur < size /* E.g., /dev/zero on GNU/Linux. */
332 || (end = lseek (fd, 0, SEEK_END)) < 0)
334 char *tmpbuf = xmalloc (bufsize);
335 end = copy_to_tmpfile (fd, tmpbuf, bufsize);
336 free (tmpbuf);
337 if (end < 0)
338 return end;
339 cur = 0;
342 if (end == OFF_T_MAX /* E.g., /dev/zero on GNU/Hurd. */
343 || (cur < end && ckd_add (&size, size, end - cur)))
345 errno = EOVERFLOW;
346 return -1;
349 if (cur < end)
351 off_t r = lseek (fd, cur, SEEK_SET);
352 if (r < 0)
353 return r;
356 return size;
359 /* Compute the next sequential output file name and store it into the
360 string 'outfile'. */
362 static void
363 next_file_name (void)
365 /* Index in suffix_alphabet of each character in the suffix. */
366 static idx_t *sufindex;
367 static idx_t outbase_length;
368 static idx_t outfile_length;
369 static idx_t addsuf_length;
371 if (! outfile)
373 bool overflow, widen;
375 new_name:
376 widen = !! outfile_length;
378 if (! widen)
380 /* Allocate and initialize the first file name. */
382 outbase_length = strlen (outbase);
383 addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
384 overflow = ckd_add (&outfile_length, outbase_length + addsuf_length,
385 suffix_length);
387 else
389 /* Reallocate and initialize a new wider file name.
390 We do this by subsuming the unchanging part of
391 the generated suffix into the prefix (base), and
392 reinitializing the now one longer suffix. */
394 overflow = ckd_add (&outfile_length, outfile_length, 2);
395 suffix_length++;
398 idx_t outfile_size;
399 overflow |= ckd_add (&outfile_size, outfile_length, 1);
400 if (overflow)
401 xalloc_die ();
402 outfile = xirealloc (outfile, outfile_size);
404 if (! widen)
405 memcpy (outfile, outbase, outbase_length);
406 else
408 /* Append the last alphabet character to the file name prefix. */
409 outfile[outbase_length] = suffix_alphabet[sufindex[0]];
410 outbase_length++;
413 outfile_mid = outfile + outbase_length;
414 memset (outfile_mid, suffix_alphabet[0], suffix_length);
415 if (additional_suffix)
416 memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length);
417 outfile[outfile_length] = 0;
419 free (sufindex);
420 sufindex = xicalloc (suffix_length, sizeof *sufindex);
422 if (numeric_suffix_start)
424 affirm (! widen);
426 /* Update the output file name. */
427 idx_t i = strlen (numeric_suffix_start);
428 memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i);
430 /* Update the suffix index. */
431 idx_t *sufindex_end = sufindex + suffix_length;
432 while (i-- != 0)
433 *--sufindex_end = numeric_suffix_start[i] - '0';
436 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
437 /* POSIX requires that if the output file name is too long for
438 its directory, 'split' must fail without creating any files.
439 This must be checked for explicitly on operating systems that
440 silently truncate file names. */
442 char *dir = dir_name (outfile);
443 long name_max = pathconf (dir, _PC_NAME_MAX);
444 if (0 <= name_max && name_max < base_len (last_component (outfile)))
445 error (EXIT_FAILURE, ENAMETOOLONG, "%s", quotef (outfile));
446 free (dir);
448 #endif
450 else
452 /* Increment the suffix in place, if possible. */
454 idx_t i = suffix_length;
455 while (i-- != 0)
457 sufindex[i]++;
458 if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1])
459 goto new_name;
460 outfile_mid[i] = suffix_alphabet[sufindex[i]];
461 if (outfile_mid[i])
462 return;
463 sufindex[i] = 0;
464 outfile_mid[i] = suffix_alphabet[sufindex[i]];
466 error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
470 /* Create or truncate a file. */
472 static int
473 create (char const *name)
475 if (!filter_command)
477 if (verbose)
478 fprintf (stdout, _("creating file %s\n"), quoteaf (name));
480 int oflags = O_WRONLY | O_CREAT | O_BINARY;
481 int fd = open (name, oflags | O_EXCL, MODE_RW_UGO);
482 if (0 <= fd || errno != EEXIST)
483 return fd;
484 fd = open (name, oflags, MODE_RW_UGO);
485 if (fd < 0)
486 return fd;
487 struct stat out_stat_buf;
488 if (fstat (fd, &out_stat_buf) != 0)
489 error (EXIT_FAILURE, errno, _("failed to stat %s"), quoteaf (name));
490 if (SAME_INODE (in_stat_buf, out_stat_buf))
491 error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
492 quoteaf (name));
493 bool regularish
494 = S_ISREG (out_stat_buf.st_mode) || S_TYPEISSHM (&out_stat_buf);
495 if (! (regularish && out_stat_buf.st_size == 0)
496 && ftruncate (fd, 0) < 0 && regularish)
497 error (EXIT_FAILURE, errno, _("%s: error truncating"), quotef (name));
499 return fd;
501 else
503 int fd_pair[2];
504 pid_t child_pid;
505 char const *shell_prog = getenv ("SHELL");
506 if (shell_prog == nullptr)
507 shell_prog = "/bin/sh";
508 if (setenv ("FILE", name, 1) != 0)
509 error (EXIT_FAILURE, errno,
510 _("failed to set FILE environment variable"));
511 if (verbose)
512 fprintf (stdout, _("executing with FILE=%s\n"), quotef (name));
513 if (pipe (fd_pair) != 0)
514 error (EXIT_FAILURE, errno, _("failed to create pipe"));
515 child_pid = fork ();
516 if (child_pid == 0)
518 /* This is the child process. If an error occurs here, the
519 parent will eventually learn about it after doing a wait,
520 at which time it will emit its own error message. */
521 int j;
522 /* We have to close any pipes that were opened during an
523 earlier call, otherwise this process will be holding a
524 write-pipe that will prevent the earlier process from
525 reading an EOF on the corresponding read-pipe. */
526 for (j = 0; j < n_open_pipes; ++j)
527 if (close (open_pipes[j]) != 0)
528 error (EXIT_FAILURE, errno, _("closing prior pipe"));
529 if (close (fd_pair[1]))
530 error (EXIT_FAILURE, errno, _("closing output pipe"));
531 if (fd_pair[0] != STDIN_FILENO)
533 if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
534 error (EXIT_FAILURE, errno, _("moving input pipe"));
535 if (close (fd_pair[0]) != 0)
536 error (EXIT_FAILURE, errno, _("closing input pipe"));
538 if (default_SIGPIPE)
539 signal (SIGPIPE, SIG_DFL);
540 execl (shell_prog, last_component (shell_prog), "-c",
541 filter_command, (char *) nullptr);
542 error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
543 shell_prog, filter_command);
545 if (child_pid < 0)
546 error (EXIT_FAILURE, errno, _("fork system call failed"));
547 if (close (fd_pair[0]) != 0)
548 error (EXIT_FAILURE, errno, _("failed to close input pipe"));
549 filter_pid = child_pid;
550 if (n_open_pipes == open_pipes_alloc)
551 open_pipes = xpalloc (open_pipes, &open_pipes_alloc, 1,
552 MIN (INT_MAX, IDX_MAX), sizeof *open_pipes);
553 open_pipes[n_open_pipes++] = fd_pair[1];
554 return fd_pair[1];
558 /* Close the output file, and do any associated cleanup.
559 If FP and FD are both specified, they refer to the same open file;
560 in this case FP is closed, but FD is still used in cleanup. */
561 static void
562 closeout (FILE *fp, int fd, pid_t pid, char const *name)
564 if (fp != nullptr && fclose (fp) != 0 && ! ignorable (errno))
565 error (EXIT_FAILURE, errno, "%s", quotef (name));
566 if (fd >= 0)
568 if (fp == nullptr && close (fd) < 0)
569 error (EXIT_FAILURE, errno, "%s", quotef (name));
570 int j;
571 for (j = 0; j < n_open_pipes; ++j)
573 if (open_pipes[j] == fd)
575 open_pipes[j] = open_pipes[--n_open_pipes];
576 break;
580 if (pid > 0)
582 int wstatus;
583 if (waitpid (pid, &wstatus, 0) < 0)
584 error (EXIT_FAILURE, errno, _("waiting for child process"));
585 else if (WIFSIGNALED (wstatus))
587 int sig = WTERMSIG (wstatus);
588 if (sig != SIGPIPE)
590 char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
591 if (sig2str (sig, signame) != 0)
592 sprintf (signame, "%d", sig);
593 error (sig + 128, 0,
594 _("with FILE=%s, signal %s from command: %s"),
595 quotef (name), signame, filter_command);
598 else if (WIFEXITED (wstatus))
600 int ex = WEXITSTATUS (wstatus);
601 if (ex != 0)
602 error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
603 quotef (name), ex, filter_command);
605 else
607 /* shouldn't happen. */
608 error (EXIT_FAILURE, 0,
609 _("unknown status from command (0x%X)"), wstatus + 0u);
614 /* Write BYTES bytes at BP to an output file.
615 If NEW_FILE_FLAG is true, open the next output file.
616 Otherwise add to the same output file already in use.
617 Return true if successful. */
619 static bool
620 cwrite (bool new_file_flag, char const *bp, idx_t bytes)
622 if (new_file_flag)
624 if (!bp && bytes == 0 && elide_empty_files)
625 return true;
626 closeout (nullptr, output_desc, filter_pid, outfile);
627 next_file_name ();
628 output_desc = create (outfile);
629 if (output_desc < 0)
630 error (EXIT_FAILURE, errno, "%s", quotef (outfile));
633 if (full_write (output_desc, bp, bytes) == bytes)
634 return true;
635 else
637 if (! ignorable (errno))
638 error (EXIT_FAILURE, errno, "%s", quotef (outfile));
639 return false;
643 /* Split into pieces of exactly N_BYTES bytes.
644 However, the first REM_BYTES pieces should be 1 byte longer.
645 Use buffer BUF, whose size is BUFSIZE.
646 If INITIAL_READ is nonnegative,
647 BUF contains the first INITIAL_READ input bytes. */
649 static void
650 bytes_split (intmax_t n_bytes, intmax_t rem_bytes,
651 char *buf, idx_t bufsize, ssize_t initial_read,
652 intmax_t max_files)
654 bool new_file_flag = true;
655 bool filter_ok = true;
656 intmax_t opened = 0;
657 intmax_t to_write = n_bytes + (0 < rem_bytes);
658 bool eof = ! to_write;
660 while (! eof)
662 ssize_t n_read;
663 if (0 <= initial_read)
665 n_read = initial_read;
666 initial_read = -1;
667 eof = n_read < bufsize;
669 else
671 if (! filter_ok
672 && 0 <= lseek (STDIN_FILENO, to_write, SEEK_CUR))
674 to_write = n_bytes + (opened + 1 < rem_bytes);
675 new_file_flag = true;
678 n_read = read (STDIN_FILENO, buf, bufsize);
679 if (n_read < 0)
680 error (EXIT_FAILURE, errno, "%s", quotef (infile));
681 eof = n_read == 0;
683 char *bp_out = buf;
684 while (0 < to_write && to_write <= n_read)
686 if (filter_ok || new_file_flag)
687 filter_ok = cwrite (new_file_flag, bp_out, to_write);
688 opened += new_file_flag;
689 new_file_flag = !max_files || (opened < max_files);
690 if (! filter_ok && ! new_file_flag)
692 /* If filters no longer accepting input, stop reading. */
693 n_read = 0;
694 eof = true;
695 break;
697 bp_out += to_write;
698 n_read -= to_write;
699 to_write = n_bytes + (opened < rem_bytes);
701 if (0 < n_read)
703 if (filter_ok || new_file_flag)
704 filter_ok = cwrite (new_file_flag, bp_out, n_read);
705 opened += new_file_flag;
706 new_file_flag = false;
707 if (! filter_ok && opened == max_files)
709 /* If filters no longer accepting input, stop reading. */
710 break;
712 to_write -= n_read;
716 /* Ensure NUMBER files are created, which truncates
717 any existing files or notifies any consumers on fifos.
718 FIXME: Should we do this before EXIT_FAILURE? */
719 while (opened++ < max_files)
720 cwrite (true, nullptr, 0);
723 /* Split into pieces of exactly N_LINES lines.
724 Use buffer BUF, whose size is BUFSIZE. */
726 static void
727 lines_split (intmax_t n_lines, char *buf, idx_t bufsize)
729 ssize_t n_read;
730 char *bp, *bp_out, *eob;
731 bool new_file_flag = true;
732 intmax_t n = 0;
736 n_read = read (STDIN_FILENO, buf, bufsize);
737 if (n_read < 0)
738 error (EXIT_FAILURE, errno, "%s", quotef (infile));
739 bp = bp_out = buf;
740 eob = bp + n_read;
741 *eob = eolchar;
742 while (true)
744 bp = rawmemchr (bp, eolchar);
745 if (bp == eob)
747 if (eob != bp_out) /* do not write 0 bytes! */
749 idx_t len = eob - bp_out;
750 cwrite (new_file_flag, bp_out, len);
751 new_file_flag = false;
753 break;
756 ++bp;
757 if (++n >= n_lines)
759 cwrite (new_file_flag, bp_out, bp - bp_out);
760 bp_out = bp;
761 new_file_flag = true;
762 n = 0;
766 while (n_read);
769 /* Split into pieces that are as large as possible while still not more
770 than N_BYTES bytes, and are split on line boundaries except
771 where lines longer than N_BYTES bytes occur. */
773 static void
774 line_bytes_split (intmax_t n_bytes, char *buf, idx_t bufsize)
776 ssize_t n_read;
777 intmax_t n_out = 0; /* for each split. */
778 idx_t n_hold = 0;
779 char *hold = nullptr; /* for lines > bufsize. */
780 idx_t hold_size = 0;
781 bool split_line = false; /* Whether a \n was output in a split. */
785 n_read = read (STDIN_FILENO, buf, bufsize);
786 if (n_read < 0)
787 error (EXIT_FAILURE, errno, "%s", quotef (infile));
788 idx_t n_left = n_read;
789 char *sob = buf;
790 while (n_left)
792 idx_t split_rest = 0;
793 char *eoc = nullptr;
794 char *eol;
796 /* Determine End Of Chunk and/or End of Line,
797 which are used below to select what to write or buffer. */
798 if (n_bytes - n_out - n_hold <= n_left)
800 /* Have enough for split. */
801 split_rest = n_bytes - n_out - n_hold;
802 eoc = sob + split_rest - 1;
803 eol = memrchr (sob, eolchar, split_rest);
805 else
806 eol = memrchr (sob, eolchar, n_left);
808 /* Output hold space if possible. */
809 if (n_hold && !(!eol && n_out))
811 cwrite (n_out == 0, hold, n_hold);
812 n_out += n_hold;
813 if (n_hold > bufsize)
814 hold = xirealloc (hold, bufsize);
815 n_hold = 0;
816 hold_size = bufsize;
819 /* Output to eol if present. */
820 if (eol)
822 split_line = true;
823 idx_t n_write = eol - sob + 1;
824 cwrite (n_out == 0, sob, n_write);
825 n_out += n_write;
826 n_left -= n_write;
827 sob += n_write;
828 if (eoc)
829 split_rest -= n_write;
832 /* Output to eoc or eob if possible. */
833 if (n_left && !split_line)
835 idx_t n_write = eoc ? split_rest : n_left;
836 cwrite (n_out == 0, sob, n_write);
837 n_out += n_write;
838 n_left -= n_write;
839 sob += n_write;
840 if (eoc)
841 split_rest -= n_write;
844 /* Update hold if needed. */
845 if ((eoc && split_rest) || (!eoc && n_left))
847 idx_t n_buf = eoc ? split_rest : n_left;
848 if (hold_size - n_hold < n_buf)
849 hold = xpalloc (hold, &hold_size, n_buf - (hold_size - n_hold),
850 -1, sizeof *hold);
851 memcpy (hold + n_hold, sob, n_buf);
852 n_hold += n_buf;
853 n_left -= n_buf;
854 sob += n_buf;
857 /* Reset for new split. */
858 if (eoc)
860 n_out = 0;
861 split_line = false;
865 while (n_read);
867 /* Handle no eol at end of file. */
868 if (n_hold)
869 cwrite (n_out == 0, hold, n_hold);
871 free (hold);
874 /* -n l/[K/]N: Write lines to files of approximately file size / N.
875 The file is partitioned into file size / N sized portions, with the
876 last assigned any excess. If a line _starts_ within a partition
877 it is written completely to the corresponding file. Since lines
878 are not split even if they overlap a partition, the files written
879 can be larger or smaller than the partition size, and even empty
880 if a line is so long as to completely overlap the partition. */
882 static void
883 lines_chunk_split (intmax_t k, intmax_t n, char *buf, idx_t bufsize,
884 ssize_t initial_read, off_t file_size)
886 affirm (n && k <= n);
888 intmax_t rem_bytes = file_size % n;
889 off_t chunk_size = file_size / n;
890 intmax_t chunk_no = 1;
891 off_t chunk_end = chunk_size + (0 < rem_bytes);
892 off_t n_written = 0;
893 bool new_file_flag = true;
894 bool chunk_truncated = false;
896 if (k > 1 && 0 < file_size)
898 /* Start reading 1 byte before kth chunk of file. */
899 off_t start = (k - 1) * chunk_size + MIN (k - 1, rem_bytes) - 1;
900 if (start < initial_read)
902 memmove (buf, buf + start, initial_read - start);
903 initial_read -= start;
905 else
907 if (initial_read < start
908 && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
909 error (EXIT_FAILURE, errno, "%s", quotef (infile));
910 initial_read = -1;
912 n_written = start;
913 chunk_no = k - 1;
914 chunk_end = start + 1;
917 while (n_written < file_size)
919 char *bp = buf, *eob;
920 ssize_t n_read;
921 if (0 <= initial_read)
923 n_read = initial_read;
924 initial_read = -1;
926 else
928 n_read = read (STDIN_FILENO, buf,
929 MIN (bufsize, file_size - n_written));
930 if (n_read < 0)
931 error (EXIT_FAILURE, errno, "%s", quotef (infile));
933 if (n_read == 0)
934 break; /* eof. */
935 chunk_truncated = false;
936 eob = buf + n_read;
938 while (bp != eob)
940 idx_t to_write;
941 bool next = false;
943 /* Begin looking for '\n' at last byte of chunk. */
944 off_t skip = MIN (n_read, MAX (0, chunk_end - 1 - n_written));
945 char *bp_out = memchr (bp + skip, eolchar, n_read - skip);
946 if (bp_out)
948 bp_out++;
949 next = true;
951 else
952 bp_out = eob;
953 to_write = bp_out - bp;
955 if (k == chunk_no)
957 /* We don't use the stdout buffer here since we're writing
958 large chunks from an existing file, so it's more efficient
959 to write out directly. */
960 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
961 write_error ();
963 else if (! k)
964 cwrite (new_file_flag, bp, to_write);
965 n_written += to_write;
966 bp += to_write;
967 n_read -= to_write;
968 new_file_flag = next;
970 /* A line could have been so long that it skipped
971 entire chunks. So create empty files in that case. */
972 while (next || chunk_end <= n_written)
974 if (!next && bp == eob)
976 /* replenish buf, before going to next chunk. */
977 chunk_truncated = true;
978 break;
980 if (k == chunk_no)
981 return;
982 chunk_end += chunk_size + (chunk_no < rem_bytes);
983 chunk_no++;
984 if (chunk_end <= n_written)
986 if (! k)
987 cwrite (true, nullptr, 0);
989 else
990 next = false;
995 if (chunk_truncated)
996 chunk_no++;
998 /* Ensure NUMBER files are created, which truncates
999 any existing files or notifies any consumers on fifos.
1000 FIXME: Should we do this before EXIT_FAILURE? */
1001 if (!k)
1002 while (chunk_no++ <= n)
1003 cwrite (true, nullptr, 0);
1006 /* -n K/N: Extract Kth of N chunks. */
1008 static void
1009 bytes_chunk_extract (intmax_t k, intmax_t n, char *buf, idx_t bufsize,
1010 ssize_t initial_read, off_t file_size)
1012 off_t start;
1013 off_t end;
1015 assert (0 < k && k <= n);
1017 start = (k - 1) * (file_size / n) + MIN (k - 1, file_size % n);
1018 end = k == n ? file_size : k * (file_size / n) + MIN (k, file_size % n);
1020 if (start < initial_read)
1022 memmove (buf, buf + start, initial_read - start);
1023 initial_read -= start;
1025 else
1027 if (initial_read < start
1028 && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
1029 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1030 initial_read = -1;
1033 while (start < end)
1035 ssize_t n_read;
1036 if (0 <= initial_read)
1038 n_read = initial_read;
1039 initial_read = -1;
1041 else
1043 n_read = read (STDIN_FILENO, buf, bufsize);
1044 if (n_read < 0)
1045 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1047 if (n_read == 0)
1048 break; /* eof. */
1049 n_read = MIN (n_read, end - start);
1050 if (full_write (STDOUT_FILENO, buf, n_read) != n_read
1051 && ! ignorable (errno))
1052 error (EXIT_FAILURE, errno, "%s", quotef ("-"));
1053 start += n_read;
1057 typedef struct of_info
1059 char *of_name;
1060 int ofd;
1061 FILE *ofile;
1062 pid_t opid;
1063 } of_t;
1065 enum
1067 OFD_NEW = -1,
1068 OFD_APPEND = -2
1071 /* Rotate file descriptors when we're writing to more output files than we
1072 have available file descriptors.
1073 Return whether we came under file resource pressure.
1074 If so, it's probably best to close each file when finished with it. */
1076 static bool
1077 ofile_open (of_t *files, idx_t i_check, idx_t nfiles)
1079 bool file_limit = false;
1081 if (files[i_check].ofd <= OFD_NEW)
1083 int fd;
1084 idx_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
1086 /* Another process could have opened a file in between the calls to
1087 close and open, so we should keep trying until open succeeds or
1088 we've closed all of our files. */
1089 while (true)
1091 if (files[i_check].ofd == OFD_NEW)
1092 fd = create (files[i_check].of_name);
1093 else /* OFD_APPEND */
1095 /* Attempt to append to previously opened file.
1096 We use O_NONBLOCK to support writing to fifos,
1097 where the other end has closed because of our
1098 previous close. In that case we'll immediately
1099 get an error, rather than waiting indefinitely.
1100 In specialised cases the consumer can keep reading
1101 from the fifo, terminating on conditions in the data
1102 itself, or perhaps never in the case of 'tail -f'.
1103 I.e., for fifos it is valid to attempt this reopen.
1105 We don't handle the filter_command case here, as create()
1106 will exit if there are not enough files in that case.
1107 I.e., we don't support restarting filters, as that would
1108 put too much burden on users specifying --filter commands. */
1109 fd = open (files[i_check].of_name,
1110 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
1113 if (0 <= fd)
1114 break;
1116 if (!(errno == EMFILE || errno == ENFILE))
1117 error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1119 file_limit = true;
1121 /* Search backwards for an open file to close. */
1122 while (files[i_reopen].ofd < 0)
1124 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
1125 /* No more open files to close, exit with E[NM]FILE. */
1126 if (i_reopen == i_check)
1127 error (EXIT_FAILURE, errno, "%s",
1128 quotef (files[i_check].of_name));
1131 if (fclose (files[i_reopen].ofile) != 0)
1132 error (EXIT_FAILURE, errno, "%s", quotef (files[i_reopen].of_name));
1133 files[i_reopen].ofile = nullptr;
1134 files[i_reopen].ofd = OFD_APPEND;
1137 files[i_check].ofd = fd;
1138 FILE *ofile = fdopen (fd, "a");
1139 if (!ofile)
1140 error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1141 files[i_check].ofile = ofile;
1142 files[i_check].opid = filter_pid;
1143 filter_pid = 0;
1146 return file_limit;
1149 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
1150 Use BUF of size BUFSIZE for the buffer, and if allocating storage
1151 put its address into *FILESP to pacify -fsanitize=leak.
1152 When K == 0, we try to keep the files open in parallel.
1153 If we run out of file resources, then we revert
1154 to opening and closing each file for each line. */
1156 static void
1157 lines_rr (intmax_t k, intmax_t n, char *buf, idx_t bufsize, of_t **filesp)
1159 bool wrapped = false;
1160 bool wrote = false;
1161 bool file_limit;
1162 idx_t i_file;
1163 of_t *files IF_LINT (= nullptr);
1164 intmax_t line_no;
1166 if (k)
1167 line_no = 1;
1168 else
1170 if (IDX_MAX < n)
1171 xalloc_die ();
1172 files = *filesp = xinmalloc (n, sizeof *files);
1174 /* Generate output file names. */
1175 for (i_file = 0; i_file < n; i_file++)
1177 next_file_name ();
1178 files[i_file].of_name = xstrdup (outfile);
1179 files[i_file].ofd = OFD_NEW;
1180 files[i_file].ofile = nullptr;
1181 files[i_file].opid = 0;
1183 i_file = 0;
1184 file_limit = false;
1187 while (true)
1189 char *bp = buf, *eob;
1190 ssize_t n_read = read (STDIN_FILENO, buf, bufsize);
1191 if (n_read < 0)
1192 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1193 else if (n_read == 0)
1194 break; /* eof. */
1195 eob = buf + n_read;
1197 while (bp != eob)
1199 idx_t to_write;
1200 bool next = false;
1202 /* Find end of line. */
1203 char *bp_out = memchr (bp, eolchar, eob - bp);
1204 if (bp_out)
1206 bp_out++;
1207 next = true;
1209 else
1210 bp_out = eob;
1211 to_write = bp_out - bp;
1213 if (k)
1215 if (line_no == k && unbuffered)
1217 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
1218 write_error ();
1220 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
1222 write_error ();
1224 if (next)
1225 line_no = (line_no == n) ? 1 : line_no + 1;
1227 else
1229 /* Secure file descriptor. */
1230 file_limit |= ofile_open (files, i_file, n);
1231 if (unbuffered)
1233 /* Note writing to fd, rather than flushing the FILE gives
1234 an 8% performance benefit, due to reduced data copying. */
1235 if (full_write (files[i_file].ofd, bp, to_write) != to_write
1236 && ! ignorable (errno))
1237 error (EXIT_FAILURE, errno, "%s",
1238 quotef (files[i_file].of_name));
1240 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
1241 && ! ignorable (errno))
1242 error (EXIT_FAILURE, errno, "%s",
1243 quotef (files[i_file].of_name));
1245 if (! ignorable (errno))
1246 wrote = true;
1248 if (file_limit)
1250 if (fclose (files[i_file].ofile) != 0)
1251 error (EXIT_FAILURE, errno, "%s",
1252 quotef (files[i_file].of_name));
1253 files[i_file].ofile = nullptr;
1254 files[i_file].ofd = OFD_APPEND;
1256 if (next && ++i_file == n)
1258 wrapped = true;
1259 /* If no filters are accepting input, stop reading. */
1260 if (! wrote)
1261 goto no_filters;
1262 wrote = false;
1263 i_file = 0;
1267 bp = bp_out;
1271 no_filters:
1272 /* Ensure all files created, so that any existing files are truncated,
1273 and to signal any waiting fifo consumers.
1274 Also, close any open file descriptors.
1275 FIXME: Should we do this before EXIT_FAILURE? */
1276 if (!k)
1278 idx_t ceiling = wrapped ? n : i_file;
1279 for (i_file = 0; i_file < n; i_file++)
1281 if (i_file >= ceiling && !elide_empty_files)
1282 file_limit |= ofile_open (files, i_file, n);
1283 if (files[i_file].ofd >= 0)
1284 closeout (files[i_file].ofile, files[i_file].ofd,
1285 files[i_file].opid, files[i_file].of_name);
1286 files[i_file].ofd = OFD_APPEND;
1291 #define FAIL_ONLY_ONE_WAY() \
1292 do \
1294 error (0, 0, _("cannot split in more than one way")); \
1295 usage (EXIT_FAILURE); \
1297 while (0)
1299 /* Report a string-to-integer conversion failure MSGID with ARG. */
1301 static _Noreturn void
1302 strtoint_die (char const *msgid, char const *arg)
1304 error (EXIT_FAILURE, errno == EINVAL ? 0 : errno, "%s: %s",
1305 gettext (msgid), quote (arg));
1308 /* Use OVERFLOW_OK when it is OK to ignore LONGINT_OVERFLOW errors, since the
1309 extreme value will do the right thing anyway on any practical platform. */
1310 #define OVERFLOW_OK LONGINT_OVERFLOW
1312 /* Parse ARG for number of bytes or lines. The number can be followed
1313 by MULTIPLIERS, and the resulting value must be positive.
1314 If the number cannot be parsed, diagnose with MSG.
1315 Return the number parsed, or an INTMAX_MAX on overflow. */
1317 static intmax_t
1318 parse_n_units (char const *arg, char const *multipliers, char const *msgid)
1320 intmax_t n;
1321 if (OVERFLOW_OK < xstrtoimax (arg, nullptr, 10, &n, multipliers) || n < 1)
1322 strtoint_die (msgid, arg);
1323 return n;
1326 /* Parse K/N syntax of chunk options. */
1328 static void
1329 parse_chunk (intmax_t *k_units, intmax_t *n_units, char const *arg)
1331 char *argend;
1332 strtol_error e = xstrtoimax (arg, &argend, 10, n_units, "");
1333 if (e == LONGINT_INVALID_SUFFIX_CHAR && *argend == '/')
1335 *k_units = *n_units;
1336 *n_units = parse_n_units (argend + 1, "",
1337 N_("invalid number of chunks"));
1338 if (! (0 < *k_units && *k_units <= *n_units))
1339 error (EXIT_FAILURE, 0, "%s: %s", _("invalid chunk number"),
1340 quote_mem (arg, argend - arg));
1342 else if (! (e <= OVERFLOW_OK && 0 < *n_units))
1343 strtoint_die (N_("invalid number of chunks"), arg);
1348 main (int argc, char **argv)
1350 enum Split_type split_type = type_undef;
1351 idx_t in_blk_size = 0; /* optimal block size of input file device */
1352 idx_t page_size = getpagesize ();
1353 intmax_t k_units = 0;
1354 intmax_t n_units = 0;
1356 static char const multipliers[] = "bEGKkMmPQRTYZ0";
1357 int c;
1358 int digits_optind = 0;
1359 off_t file_size = OFF_T_MAX;
1361 initialize_main (&argc, &argv);
1362 set_program_name (argv[0]);
1363 setlocale (LC_ALL, "");
1364 bindtextdomain (PACKAGE, LOCALEDIR);
1365 textdomain (PACKAGE);
1367 atexit (close_stdout);
1369 /* Parse command line options. */
1371 infile = bad_cast ("-");
1372 outbase = bad_cast ("x");
1374 while (true)
1376 /* This is the argv-index of the option we will read next. */
1377 int this_optind = optind ? optind : 1;
1379 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:t:ux",
1380 longopts, nullptr);
1381 if (c == -1)
1382 break;
1384 switch (c)
1386 case 'a':
1387 suffix_length = xdectoimax (optarg, 0, IDX_MAX,
1388 "", _("invalid suffix length"), 0);
1389 break;
1391 case ADDITIONAL_SUFFIX_OPTION:
1393 int suffix_len = strlen (optarg);
1394 if (last_component (optarg) != optarg
1395 || (suffix_len && ISSLASH (optarg[suffix_len - 1])))
1397 error (0, 0,
1398 _("invalid suffix %s, contains directory separator"),
1399 quote (optarg));
1400 usage (EXIT_FAILURE);
1403 additional_suffix = optarg;
1404 break;
1406 case 'b':
1407 if (split_type != type_undef)
1408 FAIL_ONLY_ONE_WAY ();
1409 split_type = type_bytes;
1410 n_units = parse_n_units (optarg, multipliers,
1411 N_("invalid number of bytes"));
1412 break;
1414 case 'l':
1415 if (split_type != type_undef)
1416 FAIL_ONLY_ONE_WAY ();
1417 split_type = type_lines;
1418 n_units = parse_n_units (optarg, "", N_("invalid number of lines"));
1419 break;
1421 case 'C':
1422 if (split_type != type_undef)
1423 FAIL_ONLY_ONE_WAY ();
1424 split_type = type_byteslines;
1425 n_units = parse_n_units (optarg, multipliers,
1426 N_("invalid number of lines"));
1427 break;
1429 case 'n':
1430 if (split_type != type_undef)
1431 FAIL_ONLY_ONE_WAY ();
1432 /* skip any whitespace */
1433 while (isspace (to_uchar (*optarg)))
1434 optarg++;
1435 if (STRNCMP_LIT (optarg, "r/") == 0)
1437 split_type = type_rr;
1438 optarg += 2;
1440 else if (STRNCMP_LIT (optarg, "l/") == 0)
1442 split_type = type_chunk_lines;
1443 optarg += 2;
1445 else
1446 split_type = type_chunk_bytes;
1447 parse_chunk (&k_units, &n_units, optarg);
1448 break;
1450 case 'u':
1451 unbuffered = true;
1452 break;
1454 case 't':
1456 char neweol = optarg[0];
1457 if (! neweol)
1458 error (EXIT_FAILURE, 0, _("empty record separator"));
1459 if (optarg[1])
1461 if (STREQ (optarg, "\\0"))
1462 neweol = '\0';
1463 else
1465 /* Provoke with 'split -txx'. Complain about
1466 "multi-character tab" instead of "multibyte tab", so
1467 that the diagnostic's wording does not need to be
1468 changed once multibyte characters are supported. */
1469 error (EXIT_FAILURE, 0, _("multi-character separator %s"),
1470 quote (optarg));
1473 /* Make it explicit we don't support multiple separators. */
1474 if (0 <= eolchar && neweol != eolchar)
1476 error (EXIT_FAILURE, 0,
1477 _("multiple separator characters specified"));
1480 eolchar = neweol;
1482 break;
1484 case '0':
1485 case '1':
1486 case '2':
1487 case '3':
1488 case '4':
1489 case '5':
1490 case '6':
1491 case '7':
1492 case '8':
1493 case '9':
1494 if (split_type == type_undef)
1496 split_type = type_digits;
1497 n_units = 0;
1499 if (split_type != type_undef && split_type != type_digits)
1500 FAIL_ONLY_ONE_WAY ();
1501 if (digits_optind != 0 && digits_optind != this_optind)
1502 n_units = 0; /* More than one number given; ignore other. */
1503 digits_optind = this_optind;
1504 if (ckd_mul (&n_units, n_units, 10)
1505 || ckd_add (&n_units, n_units, c - '0'))
1506 n_units = INTMAX_MAX;
1507 break;
1509 case 'd':
1510 case 'x':
1511 if (c == 'd')
1512 suffix_alphabet = "0123456789";
1513 else
1514 suffix_alphabet = "0123456789abcdef";
1515 if (optarg)
1517 if (strlen (optarg) != strspn (optarg, suffix_alphabet))
1519 error (0, 0,
1520 (c == 'd') ?
1521 _("%s: invalid start value for numerical suffix") :
1522 _("%s: invalid start value for hexadecimal suffix"),
1523 quote (optarg));
1524 usage (EXIT_FAILURE);
1526 else
1528 /* Skip any leading zero. */
1529 while (*optarg == '0' && *(optarg + 1) != '\0')
1530 optarg++;
1531 numeric_suffix_start = optarg;
1534 break;
1536 case 'e':
1537 elide_empty_files = true;
1538 break;
1540 case FILTER_OPTION:
1541 filter_command = optarg;
1542 break;
1544 case IO_BLKSIZE_OPTION:
1545 in_blk_size = xdectoumax (optarg, 1,
1546 MIN (SYS_BUFSIZE_MAX,
1547 MIN (IDX_MAX, SIZE_MAX) - 1),
1548 multipliers, _("invalid IO block size"), 0);
1549 break;
1551 case VERBOSE_OPTION:
1552 verbose = true;
1553 break;
1555 case_GETOPT_HELP_CHAR;
1557 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1559 default:
1560 usage (EXIT_FAILURE);
1564 if (k_units != 0 && filter_command)
1566 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1567 usage (EXIT_FAILURE);
1570 /* Handle default case. */
1571 if (split_type == type_undef)
1573 split_type = type_lines;
1574 n_units = 1000;
1577 if (n_units == 0)
1579 error (0, 0, _("invalid number of lines: %s"), quote ("0"));
1580 usage (EXIT_FAILURE);
1583 if (eolchar < 0)
1584 eolchar = '\n';
1586 set_suffix_length (n_units, split_type);
1588 /* Get out the filename arguments. */
1590 if (optind < argc)
1591 infile = argv[optind++];
1593 if (optind < argc)
1594 outbase = argv[optind++];
1596 if (optind < argc)
1598 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1599 usage (EXIT_FAILURE);
1602 /* Check that the suffix length is large enough for the numerical
1603 suffix start value. */
1604 if (numeric_suffix_start && strlen (numeric_suffix_start) > suffix_length)
1606 error (0, 0, _("numerical suffix start value is too large "
1607 "for the suffix length"));
1608 usage (EXIT_FAILURE);
1611 /* Open the input file. */
1612 if (! STREQ (infile, "-")
1613 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1614 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1615 quoteaf (infile));
1617 /* Binary I/O is safer when byte counts are used. */
1618 xset_binary_mode (STDIN_FILENO, O_BINARY);
1620 /* Advise the kernel of our access pattern. */
1621 fdadvise (STDIN_FILENO, 0, 0, FADVISE_SEQUENTIAL);
1623 /* Get the optimal block size of input device and make a buffer. */
1625 if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
1626 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1628 if (in_blk_size == 0)
1630 in_blk_size = io_blksize (in_stat_buf);
1631 if (SYS_BUFSIZE_MAX < in_blk_size)
1632 in_blk_size = SYS_BUFSIZE_MAX;
1635 char *buf = xalignalloc (page_size, in_blk_size + 1);
1636 ssize_t initial_read = -1;
1638 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1640 file_size = input_file_size (STDIN_FILENO, &in_stat_buf,
1641 buf, in_blk_size);
1642 if (file_size < 0)
1643 error (EXIT_FAILURE, errno, _("%s: cannot determine file size"),
1644 quotef (infile));
1645 initial_read = MIN (file_size, in_blk_size);
1648 /* When filtering, closure of one pipe must not terminate the process,
1649 as there may still be other streams expecting input from us. */
1650 if (filter_command)
1651 default_SIGPIPE = signal (SIGPIPE, SIG_IGN) == SIG_DFL;
1653 switch (split_type)
1655 case type_digits:
1656 case type_lines:
1657 lines_split (n_units, buf, in_blk_size);
1658 break;
1660 case type_bytes:
1661 bytes_split (n_units, 0, buf, in_blk_size, -1, 0);
1662 break;
1664 case type_byteslines:
1665 line_bytes_split (n_units, buf, in_blk_size);
1666 break;
1668 case type_chunk_bytes:
1669 if (k_units == 0)
1670 bytes_split (file_size / n_units, file_size % n_units,
1671 buf, in_blk_size, initial_read, n_units);
1672 else
1673 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read,
1674 file_size);
1675 break;
1677 case type_chunk_lines:
1678 lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read,
1679 file_size);
1680 break;
1682 case type_rr:
1683 /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1684 but the functionality is provided for symmetry. */
1686 of_t *files;
1687 lines_rr (k_units, n_units, buf, in_blk_size, &files);
1689 break;
1691 default:
1692 affirm (false);
1695 if (close (STDIN_FILENO) != 0)
1696 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1697 closeout (nullptr, output_desc, filter_pid, outfile);
1699 main_exit (EXIT_SUCCESS);