1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
20 * support -p REGEX as in BSD's split.
21 * support --suppress-matched as in csplit. */
28 #include <sys/types.h>
33 #include "fd-reopen.h"
35 #include "full-write.h"
36 #include "ioblksize.h"
38 #include "safe-read.h"
41 #include "xdectoint.h"
44 /* The official name of this program (e.g., no 'g' prefix). */
45 #define PROGRAM_NAME "split"
48 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
49 proper_name ("Richard M. Stallman")
51 /* Shell command to filter through, instead of creating files. */
52 static char const *filter_command
;
54 /* Process ID of the filter. */
55 static int filter_pid
;
57 /* Array of open pipes. */
58 static int *open_pipes
;
59 static size_t open_pipes_alloc
;
60 static size_t n_open_pipes
;
62 /* Blocked signals. */
63 static sigset_t oldblocked
;
64 static sigset_t newblocked
;
66 /* Base name of output files. */
67 static char const *outbase
;
69 /* Name of output files. */
72 /* Pointer to the end of the prefix in OUTFILE.
73 Suffixes are inserted here. */
74 static char *outfile_mid
;
76 /* Generate new suffix when suffixes are exhausted. */
77 static bool suffix_auto
= true;
79 /* Length of OUTFILE's suffix. */
80 static size_t suffix_length
;
82 /* Alphabet of characters to use in suffix. */
83 static char const *suffix_alphabet
= "abcdefghijklmnopqrstuvwxyz";
85 /* Numerical suffix start value. */
86 static const char *numeric_suffix_start
;
88 /* Additional suffix to append to output file names. */
89 static char const *additional_suffix
;
91 /* Name of input file. May be "-". */
94 /* stat buf for input file. */
95 static struct stat in_stat_buf
;
97 /* Descriptor on which output file is open. */
98 static int output_desc
= -1;
100 /* If true, print a diagnostic on standard error just before each
101 output file is opened. */
104 /* If true, don't generate zero length output files. */
105 static bool elide_empty_files
;
107 /* If true, in round robin mode, immediately copy
108 input to output, which is much slower, so disabled by default. */
109 static bool unbuffered
;
111 /* The character marking end of line. Defaults to \n below. */
112 static int eolchar
= -1;
114 /* The split mode to use. */
117 type_undef
, type_bytes
, type_byteslines
, type_lines
, type_digits
,
118 type_chunk_bytes
, type_chunk_lines
, type_rr
121 /* For long options that have no equivalent short option, use a
122 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
125 VERBOSE_OPTION
= CHAR_MAX
+ 1,
128 ADDITIONAL_SUFFIX_OPTION
131 static struct option
const longopts
[] =
133 {"bytes", required_argument
, NULL
, 'b'},
134 {"lines", required_argument
, NULL
, 'l'},
135 {"line-bytes", required_argument
, NULL
, 'C'},
136 {"number", required_argument
, NULL
, 'n'},
137 {"elide-empty-files", no_argument
, NULL
, 'e'},
138 {"unbuffered", no_argument
, NULL
, 'u'},
139 {"suffix-length", required_argument
, NULL
, 'a'},
140 {"additional-suffix", required_argument
, NULL
,
141 ADDITIONAL_SUFFIX_OPTION
},
142 {"numeric-suffixes", optional_argument
, NULL
, 'd'},
143 {"filter", required_argument
, NULL
, FILTER_OPTION
},
144 {"verbose", no_argument
, NULL
, VERBOSE_OPTION
},
145 {"separator", required_argument
, NULL
, 't'},
146 {"-io-blksize", required_argument
, NULL
,
147 IO_BLKSIZE_OPTION
}, /* do not document */
148 {GETOPT_HELP_OPTION_DECL
},
149 {GETOPT_VERSION_OPTION_DECL
},
153 /* Return true if the errno value, ERR, is ignorable. */
157 return filter_command
&& err
== EPIPE
;
161 set_suffix_length (uintmax_t n_units
, enum Split_type split_type
)
163 #define DEFAULT_SUFFIX_LENGTH 2
165 uintmax_t suffix_needed
= 0;
167 /* The suffix auto length feature is incompatible with
168 a user specified start value as the generated suffixes
169 are not all consecutive. */
170 if (numeric_suffix_start
)
173 /* Auto-calculate the suffix length if the number of files is given. */
174 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
175 || split_type
== type_rr
)
177 uintmax_t n_units_end
= n_units
;
178 if (numeric_suffix_start
)
181 strtol_error e
= xstrtoumax (numeric_suffix_start
, NULL
, 10,
183 if (e
== LONGINT_OK
&& n_start
<= UINTMAX_MAX
- n_units
)
185 /* Restrict auto adjustment so we don't keep
186 incrementing a suffix size arbitrarily,
187 as that would break sort order for files
188 generated from multiple split runs. */
189 if (n_start
< n_units
)
190 n_units_end
+= n_start
;
194 size_t alphabet_len
= strlen (suffix_alphabet
);
195 bool alphabet_slop
= (n_units_end
% alphabet_len
) != 0;
196 while (n_units_end
/= alphabet_len
)
198 suffix_needed
+= alphabet_slop
;
202 if (suffix_length
) /* set by user */
204 if (suffix_length
< suffix_needed
)
206 error (EXIT_FAILURE
, 0,
207 _("the suffix length needs to be at least %"PRIuMAX
),
214 suffix_length
= MAX (DEFAULT_SUFFIX_LENGTH
, suffix_needed
);
220 if (status
!= EXIT_SUCCESS
)
225 Usage: %s [OPTION]... [FILE [PREFIX]]\n\
229 Output pieces of FILE to PREFIXaa, PREFIXab, ...;\n\
230 default size is 1000 lines, and default PREFIX is 'x'.\n\
234 emit_mandatory_arg_note ();
236 fprintf (stdout
, _("\
237 -a, --suffix-length=N generate suffixes of length N (default %d)\n\
238 --additional-suffix=SUFFIX append an additional SUFFIX to file names\n\
239 -b, --bytes=SIZE put SIZE bytes per output file\n\
240 -C, --line-bytes=SIZE put at most SIZE bytes of records per output file\n\
241 -d use numeric suffixes starting at 0, not alphabetic\n\
242 --numeric-suffixes[=FROM] same as -d, but allow setting the start value\
244 -e, --elide-empty-files do not generate empty output files with '-n'\n\
245 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
246 -l, --lines=NUMBER put NUMBER lines/records per output file\n\
247 -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\
248 -t, --separator=SEP use SEP instead of newline as the record separator;\n\
249 '\\0' (zero) specifies the NUL character\n\
250 -u, --unbuffered immediately copy input to output with '-n r/...'\n\
251 "), DEFAULT_SUFFIX_LENGTH
);
253 --verbose print a diagnostic just before each\n\
254 output file is opened\n\
256 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
257 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
261 N split into N files based on size of input\n\
262 K/N output Kth of N to stdout\n\
263 l/N split into N files without splitting lines/records\n\
264 l/K/N output Kth of N to stdout without splitting lines/records\n\
265 r/N like 'l' but use round robin distribution\n\
266 r/K/N likewise but only output Kth of N to stdout\n\
268 emit_ancillary_info (PROGRAM_NAME
);
273 /* Return the number of bytes that can be read from FD, a file with
274 apparent size SIZE. Actually read the data into BUF (of size
275 BUFSIZE) if the file appears to be smaller than BUFSIZE, as this
276 works better on proc-like file systems. If the returned value is
277 less than BUFSIZE, store all the file's data into BUF; otherwise,
278 restore the input file's position so that the file can be reread if
282 input_file_size (int fd
, off_t size
, char *buf
, size_t bufsize
)
289 size_t save
= size
< bufsize
? size
: 0;
290 size_t n_read
= safe_read (fd
, buf
+ save
, bufsize
- save
);
293 if (n_read
== SAFE_READ_ERROR
)
294 error (EXIT_FAILURE
, errno
, "%s", infile
);
297 if (bufsize
<= size
&& lseek (fd
, - size
, SEEK_CUR
) < 0)
298 error (EXIT_FAILURE
, errno
, "%s", infile
);
304 /* Compute the next sequential output file name and store it into the
308 next_file_name (void)
310 /* Index in suffix_alphabet of each character in the suffix. */
311 static size_t *sufindex
;
312 static size_t outbase_length
;
313 static size_t outfile_length
;
314 static size_t addsuf_length
;
321 widen
= !! outfile_length
;
325 /* Allocate and initialize the first file name. */
327 outbase_length
= strlen (outbase
);
328 addsuf_length
= additional_suffix
? strlen (additional_suffix
) : 0;
329 outfile_length
= outbase_length
+ suffix_length
+ addsuf_length
;
333 /* Reallocate and initialize a new wider file name.
334 We do this by subsuming the unchanging part of
335 the generated suffix into the prefix (base), and
336 reinitializing the now one longer suffix. */
342 if (outfile_length
+ 1 < outbase_length
)
344 outfile
= xrealloc (outfile
, outfile_length
+ 1);
347 memcpy (outfile
, outbase
, outbase_length
);
350 /* Append the last alphabet character to the file name prefix. */
351 outfile
[outbase_length
] = suffix_alphabet
[sufindex
[0]];
355 outfile_mid
= outfile
+ outbase_length
;
356 memset (outfile_mid
, suffix_alphabet
[0], suffix_length
);
357 if (additional_suffix
)
358 memcpy (outfile_mid
+ suffix_length
, additional_suffix
, addsuf_length
);
359 outfile
[outfile_length
] = 0;
362 sufindex
= xcalloc (suffix_length
, sizeof *sufindex
);
364 if (numeric_suffix_start
)
368 /* Update the output file name. */
369 size_t i
= strlen (numeric_suffix_start
);
370 memcpy (outfile_mid
+ suffix_length
- i
, numeric_suffix_start
, i
);
372 /* Update the suffix index. */
373 size_t *sufindex_end
= sufindex
+ suffix_length
;
375 *--sufindex_end
= numeric_suffix_start
[i
] - '0';
378 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
379 /* POSIX requires that if the output file name is too long for
380 its directory, 'split' must fail without creating any files.
381 This must be checked for explicitly on operating systems that
382 silently truncate file names. */
384 char *dir
= dir_name (outfile
);
385 long name_max
= pathconf (dir
, _PC_NAME_MAX
);
386 if (0 <= name_max
&& name_max
< base_len (last_component (outfile
)))
387 error (EXIT_FAILURE
, ENAMETOOLONG
, "%s", outfile
);
394 /* Increment the suffix in place, if possible. */
396 size_t i
= suffix_length
;
400 if (suffix_auto
&& i
== 0 && ! suffix_alphabet
[sufindex
[0] + 1])
402 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
406 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
408 error (EXIT_FAILURE
, 0, _("output file suffixes exhausted"));
412 /* Create or truncate a file. */
415 create (const char *name
)
420 fprintf (stdout
, _("creating file %s\n"), quote (name
));
422 int fd
= open (name
, O_WRONLY
| O_CREAT
| O_BINARY
, MODE_RW_UGO
);
425 struct stat out_stat_buf
;
426 if (fstat (fd
, &out_stat_buf
) != 0)
427 error (EXIT_FAILURE
, errno
, _("failed to stat %s"), quote (name
));
428 if (SAME_INODE (in_stat_buf
, out_stat_buf
))
429 error (EXIT_FAILURE
, 0, _("%s would overwrite input; aborting"),
431 if (ftruncate (fd
, 0) != 0)
432 error (EXIT_FAILURE
, errno
, _("%s: error truncating"), quote (name
));
440 char const *shell_prog
= getenv ("SHELL");
441 if (shell_prog
== NULL
)
442 shell_prog
= "/bin/sh";
443 if (setenv ("FILE", name
, 1) != 0)
444 error (EXIT_FAILURE
, errno
,
445 _("failed to set FILE environment variable"));
447 fprintf (stdout
, _("executing with FILE=%s\n"), quote (name
));
448 if (pipe (fd_pair
) != 0)
449 error (EXIT_FAILURE
, errno
, _("failed to create pipe"));
453 /* This is the child process. If an error occurs here, the
454 parent will eventually learn about it after doing a wait,
455 at which time it will emit its own error message. */
457 /* We have to close any pipes that were opened during an
458 earlier call, otherwise this process will be holding a
459 write-pipe that will prevent the earlier process from
460 reading an EOF on the corresponding read-pipe. */
461 for (j
= 0; j
< n_open_pipes
; ++j
)
462 if (close (open_pipes
[j
]) != 0)
463 error (EXIT_FAILURE
, errno
, _("closing prior pipe"));
464 if (close (fd_pair
[1]))
465 error (EXIT_FAILURE
, errno
, _("closing output pipe"));
466 if (fd_pair
[0] != STDIN_FILENO
)
468 if (dup2 (fd_pair
[0], STDIN_FILENO
) != STDIN_FILENO
)
469 error (EXIT_FAILURE
, errno
, _("moving input pipe"));
470 if (close (fd_pair
[0]) != 0)
471 error (EXIT_FAILURE
, errno
, _("closing input pipe"));
473 sigprocmask (SIG_SETMASK
, &oldblocked
, NULL
);
474 execl (shell_prog
, last_component (shell_prog
), "-c",
475 filter_command
, (char *) NULL
);
476 error (EXIT_FAILURE
, errno
, _("failed to run command: \"%s -c %s\""),
477 shell_prog
, filter_command
);
480 error (EXIT_FAILURE
, errno
, _("fork system call failed"));
481 if (close (fd_pair
[0]) != 0)
482 error (EXIT_FAILURE
, errno
, _("failed to close input pipe"));
483 filter_pid
= child_pid
;
484 if (n_open_pipes
== open_pipes_alloc
)
485 open_pipes
= x2nrealloc (open_pipes
, &open_pipes_alloc
,
487 open_pipes
[n_open_pipes
++] = fd_pair
[1];
492 /* Close the output file, and do any associated cleanup.
493 If FP and FD are both specified, they refer to the same open file;
494 in this case FP is closed, but FD is still used in cleanup. */
496 closeout (FILE *fp
, int fd
, pid_t pid
, char const *name
)
498 if (fp
!= NULL
&& fclose (fp
) != 0 && ! ignorable (errno
))
499 error (EXIT_FAILURE
, errno
, "%s", name
);
502 if (fp
== NULL
&& close (fd
) < 0)
503 error (EXIT_FAILURE
, errno
, "%s", name
);
505 for (j
= 0; j
< n_open_pipes
; ++j
)
507 if (open_pipes
[j
] == fd
)
509 open_pipes
[j
] = open_pipes
[--n_open_pipes
];
517 if (waitpid (pid
, &wstatus
, 0) == -1 && errno
!= ECHILD
)
518 error (EXIT_FAILURE
, errno
, _("waiting for child process"));
519 if (WIFSIGNALED (wstatus
))
521 int sig
= WTERMSIG (wstatus
);
524 char signame
[MAX (SIG2STR_MAX
, INT_BUFSIZE_BOUND (int))];
525 if (sig2str (sig
, signame
) != 0)
526 sprintf (signame
, "%d", sig
);
528 _("with FILE=%s, signal %s from command: %s"),
529 name
, signame
, filter_command
);
532 else if (WIFEXITED (wstatus
))
534 int ex
= WEXITSTATUS (wstatus
);
536 error (ex
, 0, _("with FILE=%s, exit %d from command: %s"),
537 name
, ex
, filter_command
);
541 /* shouldn't happen. */
542 error (EXIT_FAILURE
, 0,
543 _("unknown status from command (0x%X)"), wstatus
+ 0u);
548 /* Write BYTES bytes at BP to an output file.
549 If NEW_FILE_FLAG is true, open the next output file.
550 Otherwise add to the same output file already in use. */
553 cwrite (bool new_file_flag
, const char *bp
, size_t bytes
)
557 if (!bp
&& bytes
== 0 && elide_empty_files
)
559 closeout (NULL
, output_desc
, filter_pid
, outfile
);
561 if ((output_desc
= create (outfile
)) < 0)
562 error (EXIT_FAILURE
, errno
, "%s", outfile
);
564 if (full_write (output_desc
, bp
, bytes
) != bytes
&& ! ignorable (errno
))
565 error (EXIT_FAILURE
, errno
, "%s", outfile
);
568 /* Split into pieces of exactly N_BYTES bytes.
569 Use buffer BUF, whose size is BUFSIZE.
570 If INITIAL_READ != SIZE_MAX, the entire input file has already been
571 partly read into BUF and BUF contains INITIAL_READ input bytes. */
574 bytes_split (uintmax_t n_bytes
, char *buf
, size_t bufsize
, size_t initial_read
,
578 bool new_file_flag
= true;
580 uintmax_t to_write
= n_bytes
;
582 uintmax_t opened
= 0;
586 if (initial_read
!= SIZE_MAX
)
588 n_read
= initial_read
;
589 initial_read
= SIZE_MAX
;
593 n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
594 if (n_read
== SAFE_READ_ERROR
)
595 error (EXIT_FAILURE
, errno
, "%s", infile
);
601 if (to_read
< to_write
)
603 if (to_read
) /* do not write 0 bytes! */
605 cwrite (new_file_flag
, bp_out
, to_read
);
606 opened
+= new_file_flag
;
608 new_file_flag
= false;
615 cwrite (new_file_flag
, bp_out
, w
);
616 opened
+= new_file_flag
;
617 new_file_flag
= !max_files
|| (opened
< max_files
);
618 if (!new_file_flag
&& ignorable (errno
))
620 /* If filter no longer accepting input, stop reading. */
632 /* Ensure NUMBER files are created, which truncates
633 any existing files or notifies any consumers on fifos.
634 FIXME: Should we do this before EXIT_FAILURE? */
635 while (opened
++ < max_files
)
636 cwrite (true, NULL
, 0);
639 /* Split into pieces of exactly N_LINES lines.
640 Use buffer BUF, whose size is BUFSIZE. */
643 lines_split (uintmax_t n_lines
, char *buf
, size_t bufsize
)
646 char *bp
, *bp_out
, *eob
;
647 bool new_file_flag
= true;
652 n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
653 if (n_read
== SAFE_READ_ERROR
)
654 error (EXIT_FAILURE
, errno
, "%s", infile
);
660 bp
= memchr (bp
, eolchar
, eob
- bp
+ 1);
663 if (eob
!= bp_out
) /* do not write 0 bytes! */
665 size_t len
= eob
- bp_out
;
666 cwrite (new_file_flag
, bp_out
, len
);
667 new_file_flag
= false;
675 cwrite (new_file_flag
, bp_out
, bp
- bp_out
);
677 new_file_flag
= true;
685 /* Split into pieces that are as large as possible while still not more
686 than N_BYTES bytes, and are split on line boundaries except
687 where lines longer than N_BYTES bytes occur. */
690 line_bytes_split (uintmax_t n_bytes
, char *buf
, size_t bufsize
)
693 uintmax_t n_out
= 0; /* for each split. */
695 char *hold
= NULL
; /* for lines > bufsize. */
696 size_t hold_size
= 0;
697 bool split_line
= false; /* Whether a \n was output in a split. */
701 n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
702 if (n_read
== SAFE_READ_ERROR
)
703 error (EXIT_FAILURE
, errno
, "%s", infile
);
704 size_t n_left
= n_read
;
708 size_t split_rest
= 0;
712 /* Determine End Of Chunk and/or End of Line,
713 which are used below to select what to write or buffer. */
714 if (n_bytes
- n_out
- n_hold
<= n_left
)
716 /* Have enough for split. */
717 split_rest
= n_bytes
- n_out
- n_hold
;
718 eoc
= sob
+ split_rest
- 1;
719 eol
= memrchr (sob
, eolchar
, split_rest
);
722 eol
= memrchr (sob
, eolchar
, n_left
);
724 /* Output hold space if possible. */
725 if (n_hold
&& !(!eol
&& n_out
))
727 cwrite (n_out
== 0, hold
, n_hold
);
729 if (n_hold
> bufsize
)
730 hold
= xrealloc (hold
, bufsize
);
735 /* Output to eol if present. */
739 size_t n_write
= eol
- sob
+ 1;
740 cwrite (n_out
== 0, sob
, n_write
);
745 split_rest
-= n_write
;
748 /* Output to eoc or eob if possible. */
749 if (n_left
&& !split_line
)
751 size_t n_write
= eoc
? split_rest
: n_left
;
752 cwrite (n_out
== 0, sob
, n_write
);
757 split_rest
-= n_write
;
760 /* Update hold if needed. */
761 if ((eoc
&& split_rest
) || (!eoc
&& n_left
))
763 size_t n_buf
= eoc
? split_rest
: n_left
;
764 if (hold_size
- n_hold
< n_buf
)
766 if (hold_size
<= SIZE_MAX
- bufsize
)
767 hold_size
+= bufsize
;
770 hold
= xrealloc (hold
, hold_size
);
772 memcpy (hold
+ n_hold
, sob
, n_buf
);
778 /* Reset for new split. */
788 /* Handle no eol at end of file. */
790 cwrite (n_out
== 0, hold
, n_hold
);
795 /* -n l/[K/]N: Write lines to files of approximately file size / N.
796 The file is partitioned into file size / N sized portions, with the
797 last assigned any excess. If a line _starts_ within a partition
798 it is written completely to the corresponding file. Since lines
799 are not split even if they overlap a partition, the files written
800 can be larger or smaller than the partition size, and even empty
801 if a line is so long as to completely overlap the partition. */
804 lines_chunk_split (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
805 size_t initial_read
, off_t file_size
)
807 assert (n
&& k
<= n
&& n
<= file_size
);
809 const off_t chunk_size
= file_size
/ n
;
810 uintmax_t chunk_no
= 1;
811 off_t chunk_end
= chunk_size
- 1;
813 bool new_file_flag
= true;
814 bool chunk_truncated
= false;
818 /* Start reading 1 byte before kth chunk of file. */
819 off_t start
= (k
- 1) * chunk_size
- 1;
820 if (initial_read
!= SIZE_MAX
)
822 memmove (buf
, buf
+ start
, initial_read
- start
);
823 initial_read
-= start
;
825 else if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
826 error (EXIT_FAILURE
, errno
, "%s", infile
);
829 chunk_end
= chunk_no
* chunk_size
- 1;
832 while (n_written
< file_size
)
834 char *bp
= buf
, *eob
;
836 if (initial_read
!= SIZE_MAX
)
838 n_read
= initial_read
;
839 initial_read
= SIZE_MAX
;
843 n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
844 if (n_read
== SAFE_READ_ERROR
)
845 error (EXIT_FAILURE
, errno
, "%s", infile
);
849 n_read
= MIN (n_read
, file_size
- n_written
);
850 chunk_truncated
= false;
858 /* Begin looking for '\n' at last byte of chunk. */
859 off_t skip
= MIN (n_read
, MAX (0, chunk_end
- n_written
));
860 char *bp_out
= memchr (bp
+ skip
, eolchar
, n_read
- skip
);
865 to_write
= bp_out
- bp
;
869 /* We don't use the stdout buffer here since we're writing
870 large chunks from an existing file, so it's more efficient
871 to write out directly. */
872 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
873 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
876 cwrite (new_file_flag
, bp
, to_write
);
877 n_written
+= to_write
;
880 new_file_flag
= next
;
882 /* A line could have been so long that it skipped
883 entire chunks. So create empty files in that case. */
884 while (next
|| chunk_end
<= n_written
- 1)
886 if (!next
&& bp
== eob
)
888 /* replenish buf, before going to next chunk. */
889 chunk_truncated
= true;
893 if (k
&& chunk_no
> k
)
896 chunk_end
= file_size
- 1; /* >= chunk_size. */
898 chunk_end
+= chunk_size
;
899 if (chunk_end
<= n_written
- 1)
902 cwrite (true, NULL
, 0);
913 /* Ensure NUMBER files are created, which truncates
914 any existing files or notifies any consumers on fifos.
915 FIXME: Should we do this before EXIT_FAILURE? */
916 while (!k
&& chunk_no
++ <= n
)
917 cwrite (true, NULL
, 0);
920 /* -n K/N: Extract Kth of N chunks. */
923 bytes_chunk_extract (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
924 size_t initial_read
, off_t file_size
)
929 assert (k
&& n
&& k
<= n
&& n
<= file_size
);
931 start
= (k
- 1) * (file_size
/ n
);
932 end
= (k
== n
) ? file_size
: k
* (file_size
/ n
);
934 if (initial_read
!= SIZE_MAX
)
936 memmove (buf
, buf
+ start
, initial_read
- start
);
937 initial_read
-= start
;
939 else if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
940 error (EXIT_FAILURE
, errno
, "%s", infile
);
945 if (initial_read
!= SIZE_MAX
)
947 n_read
= initial_read
;
948 initial_read
= SIZE_MAX
;
952 n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
953 if (n_read
== SAFE_READ_ERROR
)
954 error (EXIT_FAILURE
, errno
, "%s", infile
);
958 n_read
= MIN (n_read
, end
- start
);
959 if (full_write (STDOUT_FILENO
, buf
, n_read
) != n_read
960 && ! ignorable (errno
))
961 error (EXIT_FAILURE
, errno
, "%s", quote ("-"));
966 typedef struct of_info
980 /* Rotate file descriptors when we're writing to more output files than we
981 have available file descriptors.
982 Return whether we came under file resource pressure.
983 If so, it's probably best to close each file when finished with it. */
986 ofile_open (of_t
*files
, size_t i_check
, size_t nfiles
)
988 bool file_limit
= false;
990 if (files
[i_check
].ofd
<= OFD_NEW
)
993 size_t i_reopen
= i_check
? i_check
- 1 : nfiles
- 1;
995 /* Another process could have opened a file in between the calls to
996 close and open, so we should keep trying until open succeeds or
997 we've closed all of our files. */
1000 if (files
[i_check
].ofd
== OFD_NEW
)
1001 fd
= create (files
[i_check
].of_name
);
1002 else /* OFD_APPEND */
1004 /* Attempt to append to previously opened file.
1005 We use O_NONBLOCK to support writing to fifos,
1006 where the other end has closed because of our
1007 previous close. In that case we'll immediately
1008 get an error, rather than waiting indefinitely.
1009 In specialised cases the consumer can keep reading
1010 from the fifo, terminating on conditions in the data
1011 itself, or perhaps never in the case of 'tail -f'.
1012 I.e., for fifos it is valid to attempt this reopen.
1014 We don't handle the filter_command case here, as create()
1015 will exit if there are not enough files in that case.
1016 I.e., we don't support restarting filters, as that would
1017 put too much burden on users specifying --filter commands. */
1018 fd
= open (files
[i_check
].of_name
,
1019 O_WRONLY
| O_BINARY
| O_APPEND
| O_NONBLOCK
);
1025 if (!(errno
== EMFILE
|| errno
== ENFILE
))
1026 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
1030 /* Search backwards for an open file to close. */
1031 while (files
[i_reopen
].ofd
< 0)
1033 i_reopen
= i_reopen
? i_reopen
- 1 : nfiles
- 1;
1034 /* No more open files to close, exit with E[NM]FILE. */
1035 if (i_reopen
== i_check
)
1036 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
1039 if (fclose (files
[i_reopen
].ofile
) != 0)
1040 error (EXIT_FAILURE
, errno
, "%s", files
[i_reopen
].of_name
);
1041 files
[i_reopen
].ofile
= NULL
;
1042 files
[i_reopen
].ofd
= OFD_APPEND
;
1045 files
[i_check
].ofd
= fd
;
1046 if (!(files
[i_check
].ofile
= fdopen (fd
, "a")))
1047 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
1048 files
[i_check
].opid
= filter_pid
;
1055 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
1056 When K == 0, we try to keep the files open in parallel.
1057 If we run out of file resources, then we revert
1058 to opening and closing each file for each line. */
1061 lines_rr (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
)
1063 bool wrapped
= false;
1067 of_t
*files
IF_LINT (= NULL
);
1076 files
= xnmalloc (n
, sizeof *files
);
1078 /* Generate output file names. */
1079 for (i_file
= 0; i_file
< n
; i_file
++)
1082 files
[i_file
].of_name
= xstrdup (outfile
);
1083 files
[i_file
].ofd
= OFD_NEW
;
1084 files
[i_file
].ofile
= NULL
;
1085 files
[i_file
].opid
= 0;
1093 char *bp
= buf
, *eob
;
1094 size_t n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
1095 if (n_read
== SAFE_READ_ERROR
)
1096 error (EXIT_FAILURE
, errno
, "%s", infile
);
1097 else if (n_read
== 0)
1106 /* Find end of line. */
1107 char *bp_out
= memchr (bp
, eolchar
, eob
- bp
);
1115 to_write
= bp_out
- bp
;
1119 if (line_no
== k
&& unbuffered
)
1121 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
1122 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
1124 else if (line_no
== k
&& fwrite (bp
, to_write
, 1, stdout
) != 1)
1126 clearerr (stdout
); /* To silence close_stdout(). */
1127 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
1130 line_no
= (line_no
== n
) ? 1 : line_no
+ 1;
1134 /* Secure file descriptor. */
1135 file_limit
|= ofile_open (files
, i_file
, n
);
1138 /* Note writing to fd, rather than flushing the FILE gives
1139 an 8% performance benefit, due to reduced data copying. */
1140 if (full_write (files
[i_file
].ofd
, bp
, to_write
) != to_write
1141 && ! ignorable (errno
))
1142 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
1144 else if (fwrite (bp
, to_write
, 1, files
[i_file
].ofile
) != 1
1145 && ! ignorable (errno
))
1146 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
1147 if (! ignorable (errno
))
1152 if (fclose (files
[i_file
].ofile
) != 0)
1153 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
1154 files
[i_file
].ofile
= NULL
;
1155 files
[i_file
].ofd
= OFD_APPEND
;
1157 if (next
&& ++i_file
== n
)
1160 /* If no filters are accepting input, stop reading. */
1173 /* Ensure all files created, so that any existing files are truncated,
1174 and to signal any waiting fifo consumers.
1175 Also, close any open file descriptors.
1176 FIXME: Should we do this before EXIT_FAILURE? */
1179 int ceiling
= (wrapped
? n
: i_file
);
1180 for (i_file
= 0; i_file
< n
; i_file
++)
1182 if (i_file
>= ceiling
&& !elide_empty_files
)
1183 file_limit
|= ofile_open (files
, i_file
, n
);
1184 if (files
[i_file
].ofd
>= 0)
1185 closeout (files
[i_file
].ofile
, files
[i_file
].ofd
,
1186 files
[i_file
].opid
, files
[i_file
].of_name
);
1187 files
[i_file
].ofd
= OFD_APPEND
;
1190 IF_LINT (free (files
));
1193 #define FAIL_ONLY_ONE_WAY() \
1196 error (0, 0, _("cannot split in more than one way")); \
1197 usage (EXIT_FAILURE); \
1202 /* Parse K/N syntax of chunk options. */
1205 parse_chunk (uintmax_t *k_units
, uintmax_t *n_units
, char *slash
)
1207 *n_units
= xdectoumax (slash
+ 1, 1, UINTMAX_MAX
, "",
1208 _("invalid number of chunks"), 0);
1209 if (slash
!= optarg
) /* a leading number is specified. */
1212 *k_units
= xdectoumax (optarg
, 1, *n_units
, "",
1213 _("invalid chunk number"), 0);
1219 main (int argc
, char **argv
)
1221 enum Split_type split_type
= type_undef
;
1222 size_t in_blk_size
= 0; /* optimal block size of input file device */
1223 size_t page_size
= getpagesize ();
1224 uintmax_t k_units
= 0;
1225 uintmax_t n_units
= 0;
1227 static char const multipliers
[] = "bEGKkMmPTYZ0";
1229 int digits_optind
= 0;
1230 off_t file_size
IF_LINT (= 0);
1232 initialize_main (&argc
, &argv
);
1233 set_program_name (argv
[0]);
1234 setlocale (LC_ALL
, "");
1235 bindtextdomain (PACKAGE
, LOCALEDIR
);
1236 textdomain (PACKAGE
);
1238 atexit (close_stdout
);
1240 /* Parse command line options. */
1242 infile
= bad_cast ("-");
1243 outbase
= bad_cast ("x");
1247 /* This is the argv-index of the option we will read next. */
1248 int this_optind
= optind
? optind
: 1;
1251 c
= getopt_long (argc
, argv
, "0123456789C:a:b:del:n:t:u",
1259 suffix_length
= xdectoumax (optarg
, 0, SIZE_MAX
/ sizeof (size_t),
1260 "", _("invalid suffix length"), 0);
1263 case ADDITIONAL_SUFFIX_OPTION
:
1264 if (last_component (optarg
) != optarg
)
1267 _("invalid suffix %s, contains directory separator"),
1269 usage (EXIT_FAILURE
);
1271 additional_suffix
= optarg
;
1275 if (split_type
!= type_undef
)
1276 FAIL_ONLY_ONE_WAY ();
1277 split_type
= type_bytes
;
1278 /* Limit to OFF_T_MAX, becaue if input is a pipe, we could get more
1279 data than is possible to write to a single file, so indicate that
1280 immediately rather than having possibly future invocations fail. */
1281 n_units
= xdectoumax (optarg
, 1, OFF_T_MAX
, multipliers
,
1282 _("invalid number of bytes"), 0);
1286 if (split_type
!= type_undef
)
1287 FAIL_ONLY_ONE_WAY ();
1288 split_type
= type_lines
;
1289 n_units
= xdectoumax (optarg
, 1, UINTMAX_MAX
, "",
1290 _("invalid number of lines"), 0);
1294 if (split_type
!= type_undef
)
1295 FAIL_ONLY_ONE_WAY ();
1296 split_type
= type_byteslines
;
1297 n_units
= xdectoumax (optarg
, 1, MIN (SIZE_MAX
, OFF_T_MAX
),
1298 multipliers
, _("invalid number of bytes"), 0);
1302 if (split_type
!= type_undef
)
1303 FAIL_ONLY_ONE_WAY ();
1304 /* skip any whitespace */
1305 while (isspace (to_uchar (*optarg
)))
1307 if (STRNCMP_LIT (optarg
, "r/") == 0)
1309 split_type
= type_rr
;
1312 else if (STRNCMP_LIT (optarg
, "l/") == 0)
1314 split_type
= type_chunk_lines
;
1318 split_type
= type_chunk_bytes
;
1319 if ((slash
= strchr (optarg
, '/')))
1320 parse_chunk (&k_units
, &n_units
, slash
);
1322 n_units
= xdectoumax (optarg
, 1, UINTMAX_MAX
, "",
1323 _("invalid number of chunks"), 0);
1332 char neweol
= optarg
[0];
1334 error (EXIT_FAILURE
, 0, _("empty record separator"));
1337 if (STREQ (optarg
, "\\0"))
1341 /* Provoke with 'split -txx'. Complain about
1342 "multi-character tab" instead of "multibyte tab", so
1343 that the diagnostic's wording does not need to be
1344 changed once multibyte characters are supported. */
1345 error (EXIT_FAILURE
, 0, _("multi-character separator %s"),
1349 /* Make it explicit we don't support multiple separators. */
1350 if (0 <= eolchar
&& neweol
!= eolchar
)
1352 error (EXIT_FAILURE
, 0,
1353 _("multiple separator characters specified"));
1370 if (split_type
== type_undef
)
1372 split_type
= type_digits
;
1375 if (split_type
!= type_undef
&& split_type
!= type_digits
)
1376 FAIL_ONLY_ONE_WAY ();
1377 if (digits_optind
!= 0 && digits_optind
!= this_optind
)
1378 n_units
= 0; /* More than one number given; ignore other. */
1379 digits_optind
= this_optind
;
1380 if (!DECIMAL_DIGIT_ACCUMULATE (n_units
, c
- '0', uintmax_t))
1382 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
1383 error (EXIT_FAILURE
, 0,
1384 _("line count option -%s%c... is too large"),
1385 umaxtostr (n_units
, buffer
), c
);
1390 suffix_alphabet
= "0123456789";
1393 if (strlen (optarg
) != strspn (optarg
, suffix_alphabet
))
1396 _("%s: invalid start value for numerical suffix"),
1398 usage (EXIT_FAILURE
);
1402 /* Skip any leading zero. */
1403 while (*optarg
== '0' && *(optarg
+ 1) != '\0')
1405 numeric_suffix_start
= optarg
;
1411 elide_empty_files
= true;
1415 filter_command
= optarg
;
1418 case IO_BLKSIZE_OPTION
:
1419 in_blk_size
= xdectoumax (optarg
, 1, SIZE_MAX
- page_size
,
1420 multipliers
, _("invalid IO block size"), 0);
1423 case VERBOSE_OPTION
:
1427 case_GETOPT_HELP_CHAR
;
1429 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
1432 usage (EXIT_FAILURE
);
1436 if (k_units
!= 0 && filter_command
)
1438 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1439 usage (EXIT_FAILURE
);
1442 /* Handle default case. */
1443 if (split_type
== type_undef
)
1445 split_type
= type_lines
;
1451 error (0, 0, "%s: %s", _("invalid number of lines"), quote ("0"));
1452 usage (EXIT_FAILURE
);
1458 set_suffix_length (n_units
, split_type
);
1460 /* Get out the filename arguments. */
1463 infile
= argv
[optind
++];
1466 outbase
= argv
[optind
++];
1470 error (0, 0, _("extra operand %s"), quote (argv
[optind
]));
1471 usage (EXIT_FAILURE
);
1474 /* Check that the suffix length is large enough for the numerical
1475 suffix start value. */
1476 if (numeric_suffix_start
&& strlen (numeric_suffix_start
) > suffix_length
)
1478 error (0, 0, _("numerical suffix start value is too large "
1479 "for the suffix length"));
1480 usage (EXIT_FAILURE
);
1483 /* Open the input file. */
1484 if (! STREQ (infile
, "-")
1485 && fd_reopen (STDIN_FILENO
, infile
, O_RDONLY
, 0) < 0)
1486 error (EXIT_FAILURE
, errno
, _("cannot open %s for reading"),
1489 /* Binary I/O is safer when byte counts are used. */
1490 if (O_BINARY
&& ! isatty (STDIN_FILENO
))
1491 xfreopen (NULL
, "rb", stdin
);
1493 /* Get the optimal block size of input device and make a buffer. */
1495 if (fstat (STDIN_FILENO
, &in_stat_buf
) != 0)
1496 error (EXIT_FAILURE
, errno
, "%s", infile
);
1498 bool specified_buf_size
= !! in_blk_size
;
1499 if (! specified_buf_size
)
1500 in_blk_size
= io_blksize (in_stat_buf
);
1502 void *b
= xmalloc (in_blk_size
+ 1 + page_size
- 1);
1503 char *buf
= ptr_align (b
, page_size
);
1504 size_t initial_read
= SIZE_MAX
;
1506 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
)
1508 off_t input_offset
= lseek (STDIN_FILENO
, 0, SEEK_CUR
);
1509 if (0 <= input_offset
)
1511 if (usable_st_size (&in_stat_buf
) && ! specified_buf_size
)
1513 assert (ST_BLKSIZE (in_stat_buf
) <= in_blk_size
);
1514 file_size
= input_file_size (STDIN_FILENO
, in_stat_buf
.st_size
,
1516 if (file_size
< in_blk_size
)
1517 initial_read
= file_size
;
1521 file_size
= lseek (STDIN_FILENO
, 0, SEEK_END
);
1522 input_offset
= (file_size
< 0
1524 : lseek (STDIN_FILENO
, input_offset
, SEEK_SET
));
1525 file_size
-= input_offset
;
1528 if (input_offset
< 0)
1529 error (EXIT_FAILURE
, 0, _("%s: cannot determine file size"),
1531 /* Overflow, and sanity checking. */
1532 if (OFF_T_MAX
< n_units
)
1534 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
1535 error (EXIT_FAILURE
, EOVERFLOW
, "%s: %s",
1536 _("invalid number of chunks"),
1537 quote (umaxtostr (n_units
, buffer
)));
1539 /* increase file_size to n_units here, so that we still process
1540 any input data, and create empty files for the rest. */
1541 file_size
= MAX (file_size
, n_units
);
1544 /* When filtering, closure of one pipe must not terminate the process,
1545 as there may still be other streams expecting input from us. */
1548 struct sigaction act
;
1549 sigemptyset (&newblocked
);
1550 sigaction (SIGPIPE
, NULL
, &act
);
1551 if (act
.sa_handler
!= SIG_IGN
)
1552 sigaddset (&newblocked
, SIGPIPE
);
1553 sigprocmask (SIG_BLOCK
, &newblocked
, &oldblocked
);
1560 lines_split (n_units
, buf
, in_blk_size
);
1564 bytes_split (n_units
, buf
, in_blk_size
, SIZE_MAX
, 0);
1567 case type_byteslines
:
1568 line_bytes_split (n_units
, buf
, in_blk_size
);
1571 case type_chunk_bytes
:
1573 bytes_split (file_size
/ n_units
, buf
, in_blk_size
, initial_read
,
1576 bytes_chunk_extract (k_units
, n_units
, buf
, in_blk_size
, initial_read
,
1580 case type_chunk_lines
:
1581 lines_chunk_split (k_units
, n_units
, buf
, in_blk_size
, initial_read
,
1586 /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1587 but the functionality is provided for symmetry. */
1588 lines_rr (k_units
, n_units
, buf
, in_blk_size
);
1597 if (close (STDIN_FILENO
) != 0)
1598 error (EXIT_FAILURE
, errno
, "%s", infile
);
1599 closeout (NULL
, output_desc
, filter_pid
, outfile
);
1601 return EXIT_SUCCESS
;