1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988-2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
20 * support -p REGEX as in BSD's split.
21 * support --suppress-matched as in csplit. */
28 #include <sys/types.h>
33 #include "fd-reopen.h"
35 #include "full-write.h"
36 #include "ioblksize.h"
38 #include "safe-read.h"
41 #include "xdectoint.h"
44 /* The official name of this program (e.g., no 'g' prefix). */
45 #define PROGRAM_NAME "split"
48 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
49 proper_name ("Richard M. Stallman")
51 /* Shell command to filter through, instead of creating files. */
52 static char const *filter_command
;
54 /* Process ID of the filter. */
55 static int filter_pid
;
57 /* Array of open pipes. */
58 static int *open_pipes
;
59 static size_t open_pipes_alloc
;
60 static size_t n_open_pipes
;
62 /* Blocked signals. */
63 static sigset_t oldblocked
;
64 static sigset_t newblocked
;
66 /* Base name of output files. */
67 static char const *outbase
;
69 /* Name of output files. */
72 /* Pointer to the end of the prefix in OUTFILE.
73 Suffixes are inserted here. */
74 static char *outfile_mid
;
76 /* Generate new suffix when suffixes are exhausted. */
77 static bool suffix_auto
= true;
79 /* Length of OUTFILE's suffix. */
80 static size_t suffix_length
;
82 /* Alphabet of characters to use in suffix. */
83 static char const *suffix_alphabet
= "abcdefghijklmnopqrstuvwxyz";
85 /* Numerical suffix start value. */
86 static const char *numeric_suffix_start
;
88 /* Additional suffix to append to output file names. */
89 static char const *additional_suffix
;
91 /* Name of input file. May be "-". */
94 /* stat buf for input file. */
95 static struct stat in_stat_buf
;
97 /* Descriptor on which output file is open. */
98 static int output_desc
= -1;
100 /* If true, print a diagnostic on standard error just before each
101 output file is opened. */
104 /* If true, don't generate zero length output files. */
105 static bool elide_empty_files
;
107 /* If true, in round robin mode, immediately copy
108 input to output, which is much slower, so disabled by default. */
109 static bool unbuffered
;
111 /* The character marking end of line. Defaults to \n below. */
112 static int eolchar
= -1;
114 /* The split mode to use. */
117 type_undef
, type_bytes
, type_byteslines
, type_lines
, type_digits
,
118 type_chunk_bytes
, type_chunk_lines
, type_rr
121 /* For long options that have no equivalent short option, use a
122 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
125 VERBOSE_OPTION
= CHAR_MAX
+ 1,
128 ADDITIONAL_SUFFIX_OPTION
131 static struct option
const longopts
[] =
133 {"bytes", required_argument
, NULL
, 'b'},
134 {"lines", required_argument
, NULL
, 'l'},
135 {"line-bytes", required_argument
, NULL
, 'C'},
136 {"number", required_argument
, NULL
, 'n'},
137 {"elide-empty-files", no_argument
, NULL
, 'e'},
138 {"unbuffered", no_argument
, NULL
, 'u'},
139 {"suffix-length", required_argument
, NULL
, 'a'},
140 {"additional-suffix", required_argument
, NULL
,
141 ADDITIONAL_SUFFIX_OPTION
},
142 {"numeric-suffixes", optional_argument
, NULL
, 'd'},
143 {"filter", required_argument
, NULL
, FILTER_OPTION
},
144 {"verbose", no_argument
, NULL
, VERBOSE_OPTION
},
145 {"separator", required_argument
, NULL
, 't'},
146 {"-io-blksize", required_argument
, NULL
,
147 IO_BLKSIZE_OPTION
}, /* do not document */
148 {GETOPT_HELP_OPTION_DECL
},
149 {GETOPT_VERSION_OPTION_DECL
},
153 /* Return true if the errno value, ERR, is ignorable. */
157 return filter_command
&& err
== EPIPE
;
161 set_suffix_length (uintmax_t n_units
, enum Split_type split_type
)
163 #define DEFAULT_SUFFIX_LENGTH 2
165 uintmax_t suffix_needed
= 0;
167 /* The suffix auto length feature is incompatible with
168 a user specified start value as the generated suffixes
169 are not all consecutive. */
170 if (numeric_suffix_start
)
173 /* Auto-calculate the suffix length if the number of files is given. */
174 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
175 || split_type
== type_rr
)
177 uintmax_t n_units_end
= n_units
;
178 if (numeric_suffix_start
)
181 strtol_error e
= xstrtoumax (numeric_suffix_start
, NULL
, 10,
183 if (e
== LONGINT_OK
&& n_start
<= UINTMAX_MAX
- n_units
)
185 /* Restrict auto adjustment so we don't keep
186 incrementing a suffix size arbitrarily,
187 as that would break sort order for files
188 generated from multiple split runs. */
189 if (n_start
< n_units
)
190 n_units_end
+= n_start
;
194 size_t alphabet_len
= strlen (suffix_alphabet
);
195 bool alphabet_slop
= (n_units_end
% alphabet_len
) != 0;
196 while (n_units_end
/= alphabet_len
)
198 suffix_needed
+= alphabet_slop
;
202 if (suffix_length
) /* set by user */
204 if (suffix_length
< suffix_needed
)
206 error (EXIT_FAILURE
, 0,
207 _("the suffix length needs to be at least %"PRIuMAX
),
214 suffix_length
= MAX (DEFAULT_SUFFIX_LENGTH
, suffix_needed
);
220 if (status
!= EXIT_SUCCESS
)
225 Usage: %s [OPTION]... [FILE [PREFIX]]\n\
229 Output pieces of FILE to PREFIXaa, PREFIXab, ...;\n\
230 default size is 1000 lines, and default PREFIX is 'x'.\n\
234 emit_mandatory_arg_note ();
236 fprintf (stdout
, _("\
237 -a, --suffix-length=N generate suffixes of length N (default %d)\n\
238 --additional-suffix=SUFFIX append an additional SUFFIX to file names\n\
239 -b, --bytes=SIZE put SIZE bytes per output file\n\
240 -C, --line-bytes=SIZE put at most SIZE bytes of records per output file\n\
241 -d use numeric suffixes starting at 0, not alphabetic\n\
242 --numeric-suffixes[=FROM] same as -d, but allow setting the start value\
244 -e, --elide-empty-files do not generate empty output files with '-n'\n\
245 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
246 -l, --lines=NUMBER put NUMBER lines/records per output file\n\
247 -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\
248 -t, --separator=SEP use SEP instead of newline as the record separator;\n\
249 '\\0' (zero) specifies the NUL character\n\
250 -u, --unbuffered immediately copy input to output with '-n r/...'\n\
251 "), DEFAULT_SUFFIX_LENGTH
);
253 --verbose print a diagnostic just before each\n\
254 output file is opened\n\
256 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
257 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
261 N split into N files based on size of input\n\
262 K/N output Kth of N to stdout\n\
263 l/N split into N files without splitting lines/records\n\
264 l/K/N output Kth of N to stdout without splitting lines/records\n\
265 r/N like 'l' but use round robin distribution\n\
266 r/K/N likewise but only output Kth of N to stdout\n\
268 emit_ancillary_info (PROGRAM_NAME
);
273 /* Return the number of bytes that can be read from FD, a file with
274 apparent size SIZE. Actually read the data into BUF (of size
275 BUFSIZE) if the file appears to be smaller than BUFSIZE, as this
276 works better on proc-like file systems. If the returned value is
277 less than BUFSIZE, store all the file's data into BUF; otherwise,
278 restore the input file's position so that the file can be reread if
282 input_file_size (int fd
, off_t size
, char *buf
, size_t bufsize
)
289 size_t save
= size
< bufsize
? size
: 0;
290 size_t n_read
= safe_read (fd
, buf
+ save
, bufsize
- save
);
293 if (n_read
== SAFE_READ_ERROR
)
294 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
297 if (bufsize
<= size
&& lseek (fd
, - size
, SEEK_CUR
) < 0)
298 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
304 /* Compute the next sequential output file name and store it into the
308 next_file_name (void)
310 /* Index in suffix_alphabet of each character in the suffix. */
311 static size_t *sufindex
;
312 static size_t outbase_length
;
313 static size_t outfile_length
;
314 static size_t addsuf_length
;
321 widen
= !! outfile_length
;
325 /* Allocate and initialize the first file name. */
327 outbase_length
= strlen (outbase
);
328 addsuf_length
= additional_suffix
? strlen (additional_suffix
) : 0;
329 outfile_length
= outbase_length
+ suffix_length
+ addsuf_length
;
333 /* Reallocate and initialize a new wider file name.
334 We do this by subsuming the unchanging part of
335 the generated suffix into the prefix (base), and
336 reinitializing the now one longer suffix. */
342 if (outfile_length
+ 1 < outbase_length
)
344 outfile
= xrealloc (outfile
, outfile_length
+ 1);
347 memcpy (outfile
, outbase
, outbase_length
);
350 /* Append the last alphabet character to the file name prefix. */
351 outfile
[outbase_length
] = suffix_alphabet
[sufindex
[0]];
355 outfile_mid
= outfile
+ outbase_length
;
356 memset (outfile_mid
, suffix_alphabet
[0], suffix_length
);
357 if (additional_suffix
)
358 memcpy (outfile_mid
+ suffix_length
, additional_suffix
, addsuf_length
);
359 outfile
[outfile_length
] = 0;
362 sufindex
= xcalloc (suffix_length
, sizeof *sufindex
);
364 if (numeric_suffix_start
)
368 /* Update the output file name. */
369 size_t i
= strlen (numeric_suffix_start
);
370 memcpy (outfile_mid
+ suffix_length
- i
, numeric_suffix_start
, i
);
372 /* Update the suffix index. */
373 size_t *sufindex_end
= sufindex
+ suffix_length
;
375 *--sufindex_end
= numeric_suffix_start
[i
] - '0';
378 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
379 /* POSIX requires that if the output file name is too long for
380 its directory, 'split' must fail without creating any files.
381 This must be checked for explicitly on operating systems that
382 silently truncate file names. */
384 char *dir
= dir_name (outfile
);
385 long name_max
= pathconf (dir
, _PC_NAME_MAX
);
386 if (0 <= name_max
&& name_max
< base_len (last_component (outfile
)))
387 error (EXIT_FAILURE
, ENAMETOOLONG
, "%s", quotef (outfile
));
394 /* Increment the suffix in place, if possible. */
396 size_t i
= suffix_length
;
400 if (suffix_auto
&& i
== 0 && ! suffix_alphabet
[sufindex
[0] + 1])
402 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
406 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
408 error (EXIT_FAILURE
, 0, _("output file suffixes exhausted"));
412 /* Create or truncate a file. */
415 create (const char *name
)
420 fprintf (stdout
, _("creating file %s\n"), quoteaf (name
));
422 int fd
= open (name
, O_WRONLY
| O_CREAT
| O_BINARY
, MODE_RW_UGO
);
425 struct stat out_stat_buf
;
426 if (fstat (fd
, &out_stat_buf
) != 0)
427 error (EXIT_FAILURE
, errno
, _("failed to stat %s"), quoteaf (name
));
428 if (SAME_INODE (in_stat_buf
, out_stat_buf
))
429 error (EXIT_FAILURE
, 0, _("%s would overwrite input; aborting"),
431 if (ftruncate (fd
, 0) != 0)
432 error (EXIT_FAILURE
, errno
, _("%s: error truncating"), quotef (name
));
440 char const *shell_prog
= getenv ("SHELL");
441 if (shell_prog
== NULL
)
442 shell_prog
= "/bin/sh";
443 if (setenv ("FILE", name
, 1) != 0)
444 error (EXIT_FAILURE
, errno
,
445 _("failed to set FILE environment variable"));
447 fprintf (stdout
, _("executing with FILE=%s\n"), quotef (name
));
448 if (pipe (fd_pair
) != 0)
449 error (EXIT_FAILURE
, errno
, _("failed to create pipe"));
453 /* This is the child process. If an error occurs here, the
454 parent will eventually learn about it after doing a wait,
455 at which time it will emit its own error message. */
457 /* We have to close any pipes that were opened during an
458 earlier call, otherwise this process will be holding a
459 write-pipe that will prevent the earlier process from
460 reading an EOF on the corresponding read-pipe. */
461 for (j
= 0; j
< n_open_pipes
; ++j
)
462 if (close (open_pipes
[j
]) != 0)
463 error (EXIT_FAILURE
, errno
, _("closing prior pipe"));
464 if (close (fd_pair
[1]))
465 error (EXIT_FAILURE
, errno
, _("closing output pipe"));
466 if (fd_pair
[0] != STDIN_FILENO
)
468 if (dup2 (fd_pair
[0], STDIN_FILENO
) != STDIN_FILENO
)
469 error (EXIT_FAILURE
, errno
, _("moving input pipe"));
470 if (close (fd_pair
[0]) != 0)
471 error (EXIT_FAILURE
, errno
, _("closing input pipe"));
473 sigprocmask (SIG_SETMASK
, &oldblocked
, NULL
);
474 execl (shell_prog
, last_component (shell_prog
), "-c",
475 filter_command
, (char *) NULL
);
476 error (EXIT_FAILURE
, errno
, _("failed to run command: \"%s -c %s\""),
477 shell_prog
, filter_command
);
480 error (EXIT_FAILURE
, errno
, _("fork system call failed"));
481 if (close (fd_pair
[0]) != 0)
482 error (EXIT_FAILURE
, errno
, _("failed to close input pipe"));
483 filter_pid
= child_pid
;
484 if (n_open_pipes
== open_pipes_alloc
)
485 open_pipes
= x2nrealloc (open_pipes
, &open_pipes_alloc
,
487 open_pipes
[n_open_pipes
++] = fd_pair
[1];
492 /* Close the output file, and do any associated cleanup.
493 If FP and FD are both specified, they refer to the same open file;
494 in this case FP is closed, but FD is still used in cleanup. */
496 closeout (FILE *fp
, int fd
, pid_t pid
, char const *name
)
498 if (fp
!= NULL
&& fclose (fp
) != 0 && ! ignorable (errno
))
499 error (EXIT_FAILURE
, errno
, "%s", quotef (name
));
502 if (fp
== NULL
&& close (fd
) < 0)
503 error (EXIT_FAILURE
, errno
, "%s", quotef (name
));
505 for (j
= 0; j
< n_open_pipes
; ++j
)
507 if (open_pipes
[j
] == fd
)
509 open_pipes
[j
] = open_pipes
[--n_open_pipes
];
517 if (waitpid (pid
, &wstatus
, 0) == -1 && errno
!= ECHILD
)
518 error (EXIT_FAILURE
, errno
, _("waiting for child process"));
519 if (WIFSIGNALED (wstatus
))
521 int sig
= WTERMSIG (wstatus
);
524 char signame
[MAX (SIG2STR_MAX
, INT_BUFSIZE_BOUND (int))];
525 if (sig2str (sig
, signame
) != 0)
526 sprintf (signame
, "%d", sig
);
528 _("with FILE=%s, signal %s from command: %s"),
529 quotef (name
), signame
, filter_command
);
532 else if (WIFEXITED (wstatus
))
534 int ex
= WEXITSTATUS (wstatus
);
536 error (ex
, 0, _("with FILE=%s, exit %d from command: %s"),
537 quotef (name
), ex
, filter_command
);
541 /* shouldn't happen. */
542 error (EXIT_FAILURE
, 0,
543 _("unknown status from command (0x%X)"), wstatus
+ 0u);
548 /* Write BYTES bytes at BP to an output file.
549 If NEW_FILE_FLAG is true, open the next output file.
550 Otherwise add to the same output file already in use. */
553 cwrite (bool new_file_flag
, const char *bp
, size_t bytes
)
557 if (!bp
&& bytes
== 0 && elide_empty_files
)
559 closeout (NULL
, output_desc
, filter_pid
, outfile
);
561 if ((output_desc
= create (outfile
)) < 0)
562 error (EXIT_FAILURE
, errno
, "%s", quotef (outfile
));
564 if (full_write (output_desc
, bp
, bytes
) != bytes
&& ! ignorable (errno
))
565 error (EXIT_FAILURE
, errno
, "%s", quotef (outfile
));
568 /* Split into pieces of exactly N_BYTES bytes.
569 Use buffer BUF, whose size is BUFSIZE.
570 If INITIAL_READ != SIZE_MAX, the entire input file has already been
571 partly read into BUF and BUF contains INITIAL_READ input bytes. */
574 bytes_split (uintmax_t n_bytes
, char *buf
, size_t bufsize
, size_t initial_read
,
578 bool new_file_flag
= true;
580 uintmax_t to_write
= n_bytes
;
582 uintmax_t opened
= 0;
586 if (initial_read
!= SIZE_MAX
)
588 n_read
= initial_read
;
589 initial_read
= SIZE_MAX
;
593 n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
594 if (n_read
== SAFE_READ_ERROR
)
595 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
601 if (to_read
< to_write
)
603 if (to_read
) /* do not write 0 bytes! */
605 cwrite (new_file_flag
, bp_out
, to_read
);
606 opened
+= new_file_flag
;
608 new_file_flag
= false;
615 cwrite (new_file_flag
, bp_out
, w
);
616 opened
+= new_file_flag
;
617 new_file_flag
= !max_files
|| (opened
< max_files
);
618 if (!new_file_flag
&& ignorable (errno
))
620 /* If filter no longer accepting input, stop reading. */
632 /* Ensure NUMBER files are created, which truncates
633 any existing files or notifies any consumers on fifos.
634 FIXME: Should we do this before EXIT_FAILURE? */
635 while (opened
++ < max_files
)
636 cwrite (true, NULL
, 0);
639 /* Split into pieces of exactly N_LINES lines.
640 Use buffer BUF, whose size is BUFSIZE. */
643 lines_split (uintmax_t n_lines
, char *buf
, size_t bufsize
)
646 char *bp
, *bp_out
, *eob
;
647 bool new_file_flag
= true;
652 n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
653 if (n_read
== SAFE_READ_ERROR
)
654 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
660 bp
= memchr (bp
, eolchar
, eob
- bp
+ 1);
663 if (eob
!= bp_out
) /* do not write 0 bytes! */
665 size_t len
= eob
- bp_out
;
666 cwrite (new_file_flag
, bp_out
, len
);
667 new_file_flag
= false;
675 cwrite (new_file_flag
, bp_out
, bp
- bp_out
);
677 new_file_flag
= true;
685 /* Split into pieces that are as large as possible while still not more
686 than N_BYTES bytes, and are split on line boundaries except
687 where lines longer than N_BYTES bytes occur. */
690 line_bytes_split (uintmax_t n_bytes
, char *buf
, size_t bufsize
)
693 uintmax_t n_out
= 0; /* for each split. */
695 char *hold
= NULL
; /* for lines > bufsize. */
696 size_t hold_size
= 0;
697 bool split_line
= false; /* Whether a \n was output in a split. */
701 n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
702 if (n_read
== SAFE_READ_ERROR
)
703 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
704 size_t n_left
= n_read
;
708 size_t split_rest
= 0;
712 /* Determine End Of Chunk and/or End of Line,
713 which are used below to select what to write or buffer. */
714 if (n_bytes
- n_out
- n_hold
<= n_left
)
716 /* Have enough for split. */
717 split_rest
= n_bytes
- n_out
- n_hold
;
718 eoc
= sob
+ split_rest
- 1;
719 eol
= memrchr (sob
, eolchar
, split_rest
);
722 eol
= memrchr (sob
, eolchar
, n_left
);
724 /* Output hold space if possible. */
725 if (n_hold
&& !(!eol
&& n_out
))
727 cwrite (n_out
== 0, hold
, n_hold
);
729 if (n_hold
> bufsize
)
730 hold
= xrealloc (hold
, bufsize
);
735 /* Output to eol if present. */
739 size_t n_write
= eol
- sob
+ 1;
740 cwrite (n_out
== 0, sob
, n_write
);
745 split_rest
-= n_write
;
748 /* Output to eoc or eob if possible. */
749 if (n_left
&& !split_line
)
751 size_t n_write
= eoc
? split_rest
: n_left
;
752 cwrite (n_out
== 0, sob
, n_write
);
757 split_rest
-= n_write
;
760 /* Update hold if needed. */
761 if ((eoc
&& split_rest
) || (!eoc
&& n_left
))
763 size_t n_buf
= eoc
? split_rest
: n_left
;
764 if (hold_size
- n_hold
< n_buf
)
766 if (hold_size
<= SIZE_MAX
- bufsize
)
767 hold_size
+= bufsize
;
770 hold
= xrealloc (hold
, hold_size
);
772 memcpy (hold
+ n_hold
, sob
, n_buf
);
778 /* Reset for new split. */
788 /* Handle no eol at end of file. */
790 cwrite (n_out
== 0, hold
, n_hold
);
795 /* -n l/[K/]N: Write lines to files of approximately file size / N.
796 The file is partitioned into file size / N sized portions, with the
797 last assigned any excess. If a line _starts_ within a partition
798 it is written completely to the corresponding file. Since lines
799 are not split even if they overlap a partition, the files written
800 can be larger or smaller than the partition size, and even empty
801 if a line is so long as to completely overlap the partition. */
804 lines_chunk_split (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
805 size_t initial_read
, off_t file_size
)
807 assert (n
&& k
<= n
&& n
<= file_size
);
809 const off_t chunk_size
= file_size
/ n
;
810 uintmax_t chunk_no
= 1;
811 off_t chunk_end
= chunk_size
- 1;
813 bool new_file_flag
= true;
814 bool chunk_truncated
= false;
818 /* Start reading 1 byte before kth chunk of file. */
819 off_t start
= (k
- 1) * chunk_size
- 1;
820 if (initial_read
!= SIZE_MAX
)
822 memmove (buf
, buf
+ start
, initial_read
- start
);
823 initial_read
-= start
;
825 else if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
826 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
829 chunk_end
= chunk_no
* chunk_size
- 1;
832 while (n_written
< file_size
)
834 char *bp
= buf
, *eob
;
836 if (initial_read
!= SIZE_MAX
)
838 n_read
= initial_read
;
839 initial_read
= SIZE_MAX
;
843 n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
844 if (n_read
== SAFE_READ_ERROR
)
845 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
849 n_read
= MIN (n_read
, file_size
- n_written
);
850 chunk_truncated
= false;
858 /* Begin looking for '\n' at last byte of chunk. */
859 off_t skip
= MIN (n_read
, MAX (0, chunk_end
- n_written
));
860 char *bp_out
= memchr (bp
+ skip
, eolchar
, n_read
- skip
);
865 to_write
= bp_out
- bp
;
869 /* We don't use the stdout buffer here since we're writing
870 large chunks from an existing file, so it's more efficient
871 to write out directly. */
872 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
873 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
876 cwrite (new_file_flag
, bp
, to_write
);
877 n_written
+= to_write
;
880 new_file_flag
= next
;
882 /* A line could have been so long that it skipped
883 entire chunks. So create empty files in that case. */
884 while (next
|| chunk_end
<= n_written
- 1)
886 if (!next
&& bp
== eob
)
888 /* replenish buf, before going to next chunk. */
889 chunk_truncated
= true;
893 if (k
&& chunk_no
> k
)
896 chunk_end
= file_size
- 1; /* >= chunk_size. */
898 chunk_end
+= chunk_size
;
899 if (chunk_end
<= n_written
- 1)
902 cwrite (true, NULL
, 0);
913 /* Ensure NUMBER files are created, which truncates
914 any existing files or notifies any consumers on fifos.
915 FIXME: Should we do this before EXIT_FAILURE? */
916 while (!k
&& chunk_no
++ <= n
)
917 cwrite (true, NULL
, 0);
920 /* -n K/N: Extract Kth of N chunks. */
923 bytes_chunk_extract (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
924 size_t initial_read
, off_t file_size
)
929 assert (k
&& n
&& k
<= n
&& n
<= file_size
);
931 start
= (k
- 1) * (file_size
/ n
);
932 end
= (k
== n
) ? file_size
: k
* (file_size
/ n
);
934 if (initial_read
!= SIZE_MAX
)
936 memmove (buf
, buf
+ start
, initial_read
- start
);
937 initial_read
-= start
;
939 else if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
940 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
945 if (initial_read
!= SIZE_MAX
)
947 n_read
= initial_read
;
948 initial_read
= SIZE_MAX
;
952 n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
953 if (n_read
== SAFE_READ_ERROR
)
954 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
958 n_read
= MIN (n_read
, end
- start
);
959 if (full_write (STDOUT_FILENO
, buf
, n_read
) != n_read
960 && ! ignorable (errno
))
961 error (EXIT_FAILURE
, errno
, "%s", quotef ("-"));
966 typedef struct of_info
980 /* Rotate file descriptors when we're writing to more output files than we
981 have available file descriptors.
982 Return whether we came under file resource pressure.
983 If so, it's probably best to close each file when finished with it. */
986 ofile_open (of_t
*files
, size_t i_check
, size_t nfiles
)
988 bool file_limit
= false;
990 if (files
[i_check
].ofd
<= OFD_NEW
)
993 size_t i_reopen
= i_check
? i_check
- 1 : nfiles
- 1;
995 /* Another process could have opened a file in between the calls to
996 close and open, so we should keep trying until open succeeds or
997 we've closed all of our files. */
1000 if (files
[i_check
].ofd
== OFD_NEW
)
1001 fd
= create (files
[i_check
].of_name
);
1002 else /* OFD_APPEND */
1004 /* Attempt to append to previously opened file.
1005 We use O_NONBLOCK to support writing to fifos,
1006 where the other end has closed because of our
1007 previous close. In that case we'll immediately
1008 get an error, rather than waiting indefinitely.
1009 In specialised cases the consumer can keep reading
1010 from the fifo, terminating on conditions in the data
1011 itself, or perhaps never in the case of 'tail -f'.
1012 I.e., for fifos it is valid to attempt this reopen.
1014 We don't handle the filter_command case here, as create()
1015 will exit if there are not enough files in that case.
1016 I.e., we don't support restarting filters, as that would
1017 put too much burden on users specifying --filter commands. */
1018 fd
= open (files
[i_check
].of_name
,
1019 O_WRONLY
| O_BINARY
| O_APPEND
| O_NONBLOCK
);
1025 if (!(errno
== EMFILE
|| errno
== ENFILE
))
1026 error (EXIT_FAILURE
, errno
, "%s", quotef (files
[i_check
].of_name
));
1030 /* Search backwards for an open file to close. */
1031 while (files
[i_reopen
].ofd
< 0)
1033 i_reopen
= i_reopen
? i_reopen
- 1 : nfiles
- 1;
1034 /* No more open files to close, exit with E[NM]FILE. */
1035 if (i_reopen
== i_check
)
1036 error (EXIT_FAILURE
, errno
, "%s",
1037 quotef (files
[i_check
].of_name
));
1040 if (fclose (files
[i_reopen
].ofile
) != 0)
1041 error (EXIT_FAILURE
, errno
, "%s", quotef (files
[i_reopen
].of_name
));
1042 files
[i_reopen
].ofile
= NULL
;
1043 files
[i_reopen
].ofd
= OFD_APPEND
;
1046 files
[i_check
].ofd
= fd
;
1047 if (!(files
[i_check
].ofile
= fdopen (fd
, "a")))
1048 error (EXIT_FAILURE
, errno
, "%s", quotef (files
[i_check
].of_name
));
1049 files
[i_check
].opid
= filter_pid
;
1056 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
1057 When K == 0, we try to keep the files open in parallel.
1058 If we run out of file resources, then we revert
1059 to opening and closing each file for each line. */
1062 lines_rr (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
)
1064 bool wrapped
= false;
1068 of_t
*files
IF_LINT (= NULL
);
1077 files
= xnmalloc (n
, sizeof *files
);
1079 /* Generate output file names. */
1080 for (i_file
= 0; i_file
< n
; i_file
++)
1083 files
[i_file
].of_name
= xstrdup (outfile
);
1084 files
[i_file
].ofd
= OFD_NEW
;
1085 files
[i_file
].ofile
= NULL
;
1086 files
[i_file
].opid
= 0;
1094 char *bp
= buf
, *eob
;
1095 size_t n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
1096 if (n_read
== SAFE_READ_ERROR
)
1097 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
1098 else if (n_read
== 0)
1107 /* Find end of line. */
1108 char *bp_out
= memchr (bp
, eolchar
, eob
- bp
);
1116 to_write
= bp_out
- bp
;
1120 if (line_no
== k
&& unbuffered
)
1122 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
1123 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
1125 else if (line_no
== k
&& fwrite (bp
, to_write
, 1, stdout
) != 1)
1127 clearerr (stdout
); /* To silence close_stdout(). */
1128 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
1131 line_no
= (line_no
== n
) ? 1 : line_no
+ 1;
1135 /* Secure file descriptor. */
1136 file_limit
|= ofile_open (files
, i_file
, n
);
1139 /* Note writing to fd, rather than flushing the FILE gives
1140 an 8% performance benefit, due to reduced data copying. */
1141 if (full_write (files
[i_file
].ofd
, bp
, to_write
) != to_write
1142 && ! ignorable (errno
))
1144 error (EXIT_FAILURE
, errno
, "%s",
1145 quotef (files
[i_file
].of_name
));
1148 else if (fwrite (bp
, to_write
, 1, files
[i_file
].ofile
) != 1
1149 && ! ignorable (errno
))
1151 error (EXIT_FAILURE
, errno
, "%s",
1152 quotef (files
[i_file
].of_name
));
1154 if (! ignorable (errno
))
1159 if (fclose (files
[i_file
].ofile
) != 0)
1161 error (EXIT_FAILURE
, errno
, "%s",
1162 quotef (files
[i_file
].of_name
));
1164 files
[i_file
].ofile
= NULL
;
1165 files
[i_file
].ofd
= OFD_APPEND
;
1167 if (next
&& ++i_file
== n
)
1170 /* If no filters are accepting input, stop reading. */
1183 /* Ensure all files created, so that any existing files are truncated,
1184 and to signal any waiting fifo consumers.
1185 Also, close any open file descriptors.
1186 FIXME: Should we do this before EXIT_FAILURE? */
1189 int ceiling
= (wrapped
? n
: i_file
);
1190 for (i_file
= 0; i_file
< n
; i_file
++)
1192 if (i_file
>= ceiling
&& !elide_empty_files
)
1193 file_limit
|= ofile_open (files
, i_file
, n
);
1194 if (files
[i_file
].ofd
>= 0)
1195 closeout (files
[i_file
].ofile
, files
[i_file
].ofd
,
1196 files
[i_file
].opid
, files
[i_file
].of_name
);
1197 files
[i_file
].ofd
= OFD_APPEND
;
1200 IF_LINT (free (files
));
1203 #define FAIL_ONLY_ONE_WAY() \
1206 error (0, 0, _("cannot split in more than one way")); \
1207 usage (EXIT_FAILURE); \
1212 /* Parse K/N syntax of chunk options. */
1215 parse_chunk (uintmax_t *k_units
, uintmax_t *n_units
, char *slash
)
1217 *n_units
= xdectoumax (slash
+ 1, 1, UINTMAX_MAX
, "",
1218 _("invalid number of chunks"), 0);
1219 if (slash
!= optarg
) /* a leading number is specified. */
1222 *k_units
= xdectoumax (optarg
, 1, *n_units
, "",
1223 _("invalid chunk number"), 0);
1229 main (int argc
, char **argv
)
1231 enum Split_type split_type
= type_undef
;
1232 size_t in_blk_size
= 0; /* optimal block size of input file device */
1233 size_t page_size
= getpagesize ();
1234 uintmax_t k_units
= 0;
1235 uintmax_t n_units
= 0;
1237 static char const multipliers
[] = "bEGKkMmPTYZ0";
1239 int digits_optind
= 0;
1240 off_t file_size
IF_LINT (= 0);
1242 initialize_main (&argc
, &argv
);
1243 set_program_name (argv
[0]);
1244 setlocale (LC_ALL
, "");
1245 bindtextdomain (PACKAGE
, LOCALEDIR
);
1246 textdomain (PACKAGE
);
1248 atexit (close_stdout
);
1250 /* Parse command line options. */
1252 infile
= bad_cast ("-");
1253 outbase
= bad_cast ("x");
1257 /* This is the argv-index of the option we will read next. */
1258 int this_optind
= optind
? optind
: 1;
1261 c
= getopt_long (argc
, argv
, "0123456789C:a:b:del:n:t:u",
1269 suffix_length
= xdectoumax (optarg
, 0, SIZE_MAX
/ sizeof (size_t),
1270 "", _("invalid suffix length"), 0);
1273 case ADDITIONAL_SUFFIX_OPTION
:
1274 if (last_component (optarg
) != optarg
)
1277 _("invalid suffix %s, contains directory separator"),
1279 usage (EXIT_FAILURE
);
1281 additional_suffix
= optarg
;
1285 if (split_type
!= type_undef
)
1286 FAIL_ONLY_ONE_WAY ();
1287 split_type
= type_bytes
;
1288 /* Limit to OFF_T_MAX, because if input is a pipe, we could get more
1289 data than is possible to write to a single file, so indicate that
1290 immediately rather than having possibly future invocations fail. */
1291 n_units
= xdectoumax (optarg
, 1, OFF_T_MAX
, multipliers
,
1292 _("invalid number of bytes"), 0);
1296 if (split_type
!= type_undef
)
1297 FAIL_ONLY_ONE_WAY ();
1298 split_type
= type_lines
;
1299 n_units
= xdectoumax (optarg
, 1, UINTMAX_MAX
, "",
1300 _("invalid number of lines"), 0);
1304 if (split_type
!= type_undef
)
1305 FAIL_ONLY_ONE_WAY ();
1306 split_type
= type_byteslines
;
1307 n_units
= xdectoumax (optarg
, 1, MIN (SIZE_MAX
, OFF_T_MAX
),
1308 multipliers
, _("invalid number of bytes"), 0);
1312 if (split_type
!= type_undef
)
1313 FAIL_ONLY_ONE_WAY ();
1314 /* skip any whitespace */
1315 while (isspace (to_uchar (*optarg
)))
1317 if (STRNCMP_LIT (optarg
, "r/") == 0)
1319 split_type
= type_rr
;
1322 else if (STRNCMP_LIT (optarg
, "l/") == 0)
1324 split_type
= type_chunk_lines
;
1328 split_type
= type_chunk_bytes
;
1329 if ((slash
= strchr (optarg
, '/')))
1330 parse_chunk (&k_units
, &n_units
, slash
);
1332 n_units
= xdectoumax (optarg
, 1, UINTMAX_MAX
, "",
1333 _("invalid number of chunks"), 0);
1342 char neweol
= optarg
[0];
1344 error (EXIT_FAILURE
, 0, _("empty record separator"));
1347 if (STREQ (optarg
, "\\0"))
1351 /* Provoke with 'split -txx'. Complain about
1352 "multi-character tab" instead of "multibyte tab", so
1353 that the diagnostic's wording does not need to be
1354 changed once multibyte characters are supported. */
1355 error (EXIT_FAILURE
, 0, _("multi-character separator %s"),
1359 /* Make it explicit we don't support multiple separators. */
1360 if (0 <= eolchar
&& neweol
!= eolchar
)
1362 error (EXIT_FAILURE
, 0,
1363 _("multiple separator characters specified"));
1380 if (split_type
== type_undef
)
1382 split_type
= type_digits
;
1385 if (split_type
!= type_undef
&& split_type
!= type_digits
)
1386 FAIL_ONLY_ONE_WAY ();
1387 if (digits_optind
!= 0 && digits_optind
!= this_optind
)
1388 n_units
= 0; /* More than one number given; ignore other. */
1389 digits_optind
= this_optind
;
1390 if (!DECIMAL_DIGIT_ACCUMULATE (n_units
, c
- '0', uintmax_t))
1392 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
1393 error (EXIT_FAILURE
, 0,
1394 _("line count option -%s%c... is too large"),
1395 umaxtostr (n_units
, buffer
), c
);
1400 suffix_alphabet
= "0123456789";
1403 if (strlen (optarg
) != strspn (optarg
, suffix_alphabet
))
1406 _("%s: invalid start value for numerical suffix"),
1408 usage (EXIT_FAILURE
);
1412 /* Skip any leading zero. */
1413 while (*optarg
== '0' && *(optarg
+ 1) != '\0')
1415 numeric_suffix_start
= optarg
;
1421 elide_empty_files
= true;
1425 filter_command
= optarg
;
1428 case IO_BLKSIZE_OPTION
:
1429 in_blk_size
= xdectoumax (optarg
, 1, SIZE_MAX
- page_size
,
1430 multipliers
, _("invalid IO block size"), 0);
1433 case VERBOSE_OPTION
:
1437 case_GETOPT_HELP_CHAR
;
1439 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
1442 usage (EXIT_FAILURE
);
1446 if (k_units
!= 0 && filter_command
)
1448 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1449 usage (EXIT_FAILURE
);
1452 /* Handle default case. */
1453 if (split_type
== type_undef
)
1455 split_type
= type_lines
;
1461 error (0, 0, "%s: %s", _("invalid number of lines"), quote ("0"));
1462 usage (EXIT_FAILURE
);
1468 set_suffix_length (n_units
, split_type
);
1470 /* Get out the filename arguments. */
1473 infile
= argv
[optind
++];
1476 outbase
= argv
[optind
++];
1480 error (0, 0, _("extra operand %s"), quote (argv
[optind
]));
1481 usage (EXIT_FAILURE
);
1484 /* Check that the suffix length is large enough for the numerical
1485 suffix start value. */
1486 if (numeric_suffix_start
&& strlen (numeric_suffix_start
) > suffix_length
)
1488 error (0, 0, _("numerical suffix start value is too large "
1489 "for the suffix length"));
1490 usage (EXIT_FAILURE
);
1493 /* Open the input file. */
1494 if (! STREQ (infile
, "-")
1495 && fd_reopen (STDIN_FILENO
, infile
, O_RDONLY
, 0) < 0)
1496 error (EXIT_FAILURE
, errno
, _("cannot open %s for reading"),
1499 /* Binary I/O is safer when byte counts are used. */
1500 if (O_BINARY
&& ! isatty (STDIN_FILENO
))
1501 xfreopen (NULL
, "rb", stdin
);
1503 /* Get the optimal block size of input device and make a buffer. */
1505 if (fstat (STDIN_FILENO
, &in_stat_buf
) != 0)
1506 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
1508 bool specified_buf_size
= !! in_blk_size
;
1509 if (! specified_buf_size
)
1510 in_blk_size
= io_blksize (in_stat_buf
);
1512 void *b
= xmalloc (in_blk_size
+ 1 + page_size
- 1);
1513 char *buf
= ptr_align (b
, page_size
);
1514 size_t initial_read
= SIZE_MAX
;
1516 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
)
1518 off_t input_offset
= lseek (STDIN_FILENO
, 0, SEEK_CUR
);
1519 if (0 <= input_offset
)
1521 if (usable_st_size (&in_stat_buf
) && ! specified_buf_size
)
1523 assert (ST_BLKSIZE (in_stat_buf
) <= in_blk_size
);
1524 file_size
= input_file_size (STDIN_FILENO
, in_stat_buf
.st_size
,
1526 if (file_size
< in_blk_size
)
1527 initial_read
= file_size
;
1531 file_size
= lseek (STDIN_FILENO
, 0, SEEK_END
);
1532 input_offset
= (file_size
< 0
1534 : lseek (STDIN_FILENO
, input_offset
, SEEK_SET
));
1535 file_size
-= input_offset
;
1538 if (input_offset
< 0)
1539 error (EXIT_FAILURE
, 0, _("%s: cannot determine file size"),
1541 /* Overflow, and sanity checking. */
1542 if (OFF_T_MAX
< n_units
)
1544 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
1545 error (EXIT_FAILURE
, EOVERFLOW
, "%s: %s",
1546 _("invalid number of chunks"),
1547 quote (umaxtostr (n_units
, buffer
)));
1549 /* increase file_size to n_units here, so that we still process
1550 any input data, and create empty files for the rest. */
1551 file_size
= MAX (file_size
, n_units
);
1554 /* When filtering, closure of one pipe must not terminate the process,
1555 as there may still be other streams expecting input from us. */
1558 struct sigaction act
;
1559 sigemptyset (&newblocked
);
1560 sigaction (SIGPIPE
, NULL
, &act
);
1561 if (act
.sa_handler
!= SIG_IGN
)
1562 sigaddset (&newblocked
, SIGPIPE
);
1563 sigprocmask (SIG_BLOCK
, &newblocked
, &oldblocked
);
1570 lines_split (n_units
, buf
, in_blk_size
);
1574 bytes_split (n_units
, buf
, in_blk_size
, SIZE_MAX
, 0);
1577 case type_byteslines
:
1578 line_bytes_split (n_units
, buf
, in_blk_size
);
1581 case type_chunk_bytes
:
1583 bytes_split (file_size
/ n_units
, buf
, in_blk_size
, initial_read
,
1586 bytes_chunk_extract (k_units
, n_units
, buf
, in_blk_size
, initial_read
,
1590 case type_chunk_lines
:
1591 lines_chunk_split (k_units
, n_units
, buf
, in_blk_size
, initial_read
,
1596 /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1597 but the functionality is provided for symmetry. */
1598 lines_rr (k_units
, n_units
, buf
, in_blk_size
);
1607 if (close (STDIN_FILENO
) != 0)
1608 error (EXIT_FAILURE
, errno
, "%s", quotef (infile
));
1609 closeout (NULL
, output_desc
, filter_pid
, outfile
);
1611 return EXIT_SUCCESS
;