modified: nfig1.py
[GalaxyCodeBases.git] / etc / split-tar
blob4862a04546a295dbf8205321f0460863bbb84a63
1 #!/bin/bash
2 # splits a large tar file into a set of smaller ones
4 # Author: Dr. Jürgen Vollmer <juergen.vollmer@informatik-vollmer.de>
5 # Copyright (C) 2003 Dr. Jürgen Vollmer, Karlsruhe, Germany
6 # For usage and license agreement, see below (function usage)
8 # Id: split-tar,v 1.30 2010/01/13 17:57:52 vollmer Exp $
9 # Version: 1.11 of 2006/06/02
11 #set -x
13 CMD=`basename $0`
14 VERSION="1.11"
16 ###############################################################################
18 usage()
20 cat <<END
21 usage: $CMD [options] tarfile.<suffix> (filename|directory)...
23 Splits a large tar archive into a set of smaller ones.
24 Creates a set of tar archives direct from the files and directories.
26 <suffix> is one of tar, tar.gz, tar.xz, tgz, or tar.bz2
27 Files are written to tarfile-???.<suffix> into the current working directory,
28 where ??? are three digits.
29 Note: since a TAR file contains tar-specific administration information
30 the resulting tar files may be larger that the specified size.
31 For computation only the file size of the sources are used.
33 Note: split-tar relies on the GNU version of "tar", "find" and "bash".
34 Note: split-tar is not able to read the filenames from stdin.
35 Use -T instead.
37 Options:
38 -c : Create the tar archives from [filename|directory...].
39 -C opts : Pass opts to tar, when creating the tarfile with -c
40 the compression options -z (gzip), -J (xz) or -j (bizp2) are
41 added by default, if the <suffix> indicates it.
42 -e rate : To compute the set of files to be put into a compressed
43 tarfile, one has to estimate compressed size of each
44 uncompressed source file. To do this a compression program
45 indicated by the tarfile.<suffix> is called (e.g. gzip).
46 This may be quite time consiming.
48 This overhead my be avoided by giving an "compression rate"
49 using the -e option. The real file-size of an an uncompressed
50 file is divided by that <rate>. This may result in
51 too large or too small result tarfiles. So one has to to some
52 trial and error to get the <rate> value right.
54 The <rate> is positive number.
55 -f prog : Use prog as "find" program, e.g.
56 -f /usr/local/bin/gfind
57 -N date : Only store files newer than <date>.
58 Typical format: YYYY-MM-DD or 'YYYY-MM-DD HH:MM:SS' or
59 if <date> begins with \`/' or \`.', it is taken to be the name
60 of a file whose last-modified time specifies the date.
61 -N passes its argument as tar option \`--newer=<date>'
62 (this may be changed in the source of this script, see
63 variable TAR_NEWER).
64 -N is valid only if -c is given.
65 -h : Help
66 -s sizeK : Maximum size of one tar file in Kilo bytes, default ${DEFAULT_SIZE}
67 -s sizeM : Size given in Mega Byte
68 -s sizeG : Size given in Giga Byte
69 -S : Split the existing tar archive tarfile.<suffix>
70 no [filename|directory...] may be given
71 that's the default
72 -t prog : Use prog as "tar" program, e.g.
73 -t /usr/local/bin/gtar
74 -T file : Read names to create the archive from <file>
75 -v : Verbose (verbose tar messages)
76 -V : Version.
78 Example:
79 Splitting an already existing archive:
80 If foo.tar.gz has a size of 3 M bytes, the command
81 split-tar -s 1M foo.tar.gz
82 will create the three tar.gz archives:
83 foo-000.tar.gz
84 foo-001.tar.gz
85 foo-002.tar.gz
86 which may be unpacked as usual:
87 tar -xzvf foo-000.tar.gz
88 tar -xzvf foo-001.tar.gz
89 tar -xzvf foo-002.tar.gz
90 and the the result would be the same as if one unpacks the initial archive
91 tar -xzvf foo.tar.gz
93 Creating the archives directly from the sources:
94 split-tar -e 5 -s 10M -c foo.tar.gz /home/foo
95 will create tar archives:
96 foo-000.tar.gz, .... foo-<n>.tar.gz
97 containing foo's home directory. A compression rate of 5 is assumed
98 for all not already compressed files.
100 Requirements:
101 BASH, GNU-tar, and GNU-find.
103 Version:
104 1.11 of 2006/06/02
105 1.12 of 2013/11/16 by Galaxy
107 Author:
108 Dr. Jürgen Vollmer <juergen.vollmer@informatik-vollmer.de>
109 If you find this software useful, I would be glad to receive a postcard
110 from you, showing the place where you're living.
112 Homepage:
113 http://www.informatik-vollmer.de/software/split-tar.html
115 Copyright:
116 (C) 2003 Dr. Jürgen Vollmer, Viktoriastrasse 15, D-76133 Karlsruhe, Germany
118 License:
119 This program is free software; you can redistribute it and/or modify
120 it under the terms of the GNU General Public License as published by
121 the Free Software Foundation; either version 2 of the License, or
122 any later version.
124 This program is distributed in the hope that it will be useful,
125 but WITHOUT ANY WARRANTY; without even the implied warranty of
126 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
127 GNU General Public License for more details.
129 You should have received a copy of the GNU General Public License
130 along with this program; if not, write to the Free Software
131 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
133 Tips:
134 split-tar -e 1 -s 1M -c tmp/tmptmp.tgz path/to/mp4/files
137 exit
140 ###############################################################################
142 DEFAULT_SIZE=1024 # kbyte
144 # we need the GNU utilities!
145 # which tar program to use, may be changed with the -t option
146 TAR=tar
148 # which find program to use, may be changed with the -t option
149 FIND=find
151 # file containing filenames
152 FILES=${TMP=/tmp}/$CMD.files.$$
154 # file containing the filename of a single file, too large for a single tar
155 FILE=${TMP=/tmp}/$CMD.file.$$
157 # where to untar the source tar file
158 TAR_DIR=${TMP=/tmp}/$CMD.dir.$$/
160 # file containing tar sources names
161 TAR_SOURCES=${TMP=/tmp}/$CMD.tarsources.$$
163 # remove temporary created files on exit
164 exit_trap()
166 if [ $OWN_TAR_SOURCES = NO ]
167 then
168 rm -fr $TAR_SOURCES
170 rm -fr $FILES $FILE $TAR_DIR
172 trap exit_trap EXIT
174 # tar-file count
175 COUNT=0
177 # flag for selection about OWN_TAR_SOURCES
178 OWN_TAR_SOURCES=NO
180 # the GNU-tar option to conserve absolute filenames
181 # used only if for the -c (create) mode, if the user gives an absolute
182 # path
183 # older tar versions may use:
184 # TAR_WITH_ABSOLUTE_NAMES=--absolute-paths
185 TAR_WITH_ABSOLUTE_NAMES=--absolute-names
187 # the GNU-tar option for storing files newer than DATE
188 # another possibility would be: --newer-mtime
189 TAR_NEWER=--newer
191 # argument of -e
192 COMPRESSION_RATE=
194 ##############################################################################
195 # emit an error message and terminate
196 ##############################################################################
198 error()
200 echo "$CMD: error $*" 1>&2
201 exit 1
204 ##############################################################################
205 # create tar files
206 ##############################################################################
208 TAR_VERBOSE=
209 do_tar()
211 files=$1
212 dest=`printf "%s/%s-%03d%s" $DEST_DIR $DEST_BASE $COUNT $SUFFIX`
213 touch $dest >/dev/null 2>&1 || error "can not create file $dest"
214 if [ $DO_CREATE = NO ]
215 then TD="-C $TAR_DIR"
216 else TD=
218 $TAR $TD $CREATE_OPTS $TAR_COMPRESS $TAR_VERBOSE \
219 -c -f $dest --files-from=$files --no-recursion
220 COUNT=$((COUNT + 1))
221 ( size=`cat $dest | wc -c`;
222 printf "** create: %s: size: %9d (bytes)\n" $dest $size )
225 ##############################################################################
226 # emit all parts of a directory path name
227 ##############################################################################
229 emit_dir_parts()
231 local ff="$*"
232 while [ ! \( -z "$ff" -o "$ff" = "." -o "$ff" = "/" \) ]
234 echo "yyyyy $ff/"
235 ff=`dirname "$ff"`
236 done
239 ##############################################################################
240 # emit all parts of a directory path name (sorted)
241 ##############################################################################
243 # LC_ALL=C to get the traditional sort order that uses native byte values.
245 emit_dir_parts_sorted()
247 local ff="$*"
249 while [ ! \( -z "$ff" -o "$ff" = "." -o "$ff" = "/" \) ]
251 echo "$ff"
252 ff=`dirname "$ff"`
253 done
254 ) | (LC_ALL=C sort -u -s)
257 ##############################################################################
258 # check options
259 ##############################################################################
261 DO_CREATE=NO
262 CREATE_OPTS=
263 MAX_SIZE=$((DEFAULT_SIZE * 1024))
264 TAR_NEWER_ARG=
265 while getopts cC:e:f:N:hvs:St:T:vV opt "$@"
267 case $opt in
268 c ) DO_CREATE=YES;;
269 C ) CREATE_OPTS="$CREATE_OPTS $OPTARG";;
270 e ) COMPRESSION_RATE=$OPTARG;;
271 f ) FIND=$OPTARG;;
272 N ) TAR_NEWER_ARG=$OPTARG;;
273 S ) DO_CREATE=NO;;
274 s ) [ x"$OPTARG" = x`expr "$OPTARG" : "\([0-9]*[kKmMgG]\)"` ] ||
275 error "-s expects a number followed by one optional character of KMG"
276 case $OPTARG in
277 *[kK] ) MAX_SIZE=$((${OPTARG%[kK]} * 1024));;
278 *[mM] ) MAX_SIZE=$((${OPTARG%[mM]} * 1024 * 1024));;
279 *[gG] ) MAX_SIZE=$((${OPTARG%[gG]} * 1024 * 1024 * 1024));;
280 * ) MAX_SIZE=$(($OPTARG * 1024));;
281 esac;;
282 t) TAR=$OPTARG;;
283 T) TAR_SOURCES=$OPTARG
284 OWN_TAR_SOURCES=YES
285 [ -s $TAR_SOURCES ] ||
286 error "-T expects a filename with files to get tar'ed in"
288 v) TAR_VERBOSE=-v;;
289 V) echo "$CMD $VERSION"
290 exit
292 h|*) usage;;
293 esac
294 done
295 shift `expr $OPTIND - 1`
297 # check correct version of TAR and FIND
298 if $TAR --version 2>&1 | grep "GNU tar" > /dev/null
299 then :
300 else echo "$CMD: sorry $TAR is no GNU tar"
301 exit 1;
304 if $FIND --version 2>&1 | grep "GNU find" > /dev/null
305 then :
306 else echo "$CMD: sorry $FIND is no GNU find"
307 exit 1;
310 if [ $DO_CREATE == YES ]
311 then
312 if [ $OWN_TAR_SOURCES = YES ]
313 then
314 [ $# -ge 1 ] || error "expected at least one more argument, for more information: $CMD -h"
315 TAR_FILE=$1; shift
316 else
317 [ $# -ge 2 ] || error "expected at least two arguments, for more information: $CMD -h"
318 TAR_FILE=$1; shift
319 while [ $# -ge 1 ]
321 echo $1 >> $TAR_SOURCES ; shift
322 # more $TAR_SOURCES
323 done
325 [ -z "$TAR_NEWER_ARG" ] && TAR_NEWER_ARG="1970-01-01 00:00:00"
326 TAR_DIR=
327 else
328 [ $# -eq 1 ] || error "expected one argument, for more information: $CMD -h"
329 TAR_FILE=$1
330 [ -f $TAR_FILE ] || error "could not read $TAR_FILE"
331 [ -z "$TAR_NEWER_ARG" ] || error "-N requires -c"
334 # COMPRESS_CMD is used only to compute the estimated compressed size fo a file
335 # it is not used to actually do the compression. That is done via the
336 # TAR_COMPRESS tar command line option
337 case `basename $TAR_FILE` in
338 *.tar.xz ) SUFFIX=".tar.xz"
339 COMPRESS_CMD="xz --stdout"
340 TAR_COMPRESS=--xz;;
341 *.tar.bz2 ) SUFFIX=".tar.bz2"
342 COMPRESS_CMD="bzip2 --stdout"
343 TAR_COMPRESS=--bzip2;;
344 *.tar.gz ) SUFFIX=".tar.gz"
345 COMPRESS_CMD="gzip --stdout --no-name"
346 TAR_COMPRESS=--gzip;;
347 *.tgz ) SUFFIX=".tgz"
348 COMPRESS_CMD="gzip --stdout --no-name"
349 TAR_COMPRESS=--gzip;;
350 *.tar ) SUFFIX=".tar"
351 COMPRESS_CMD=
352 TAR_COMPRESS=;;
353 * ) error "unknown suffix of $TAR_FILE";;
354 esac
355 DEST_BASE=`basename $TAR_FILE $SUFFIX`
356 DEST_DIR=`dirname $TAR_FILE`
358 ##############################################################################
359 # do the job
360 ##############################################################################
362 # the size of the files to be tar'ed
363 cur_size=0
365 rm -fr $FILES $FILE $TAR_DIR $DEST_BASE-[0-9][0-9][0-9]$SUFFIX
367 # The line with "xxxx xxxx" indicate: we have seen all files, tar the remaining
368 # files
369 # The line with "yyyy <name>" indicate: a directory or other kind of file.
370 # We have to add directories in order to get the file permissions right.
372 if [ $DO_CREATE = NO ]
373 then
374 ############################################################################
375 # unpack the source tar archive
376 ############################################################################
378 mkdir -p $TAR_DIR || error "can not create $TAR_DIR"
379 $TAR -C $TAR_DIR -x $TAR_COMPRESS -f $TAR_FILE || error "can not un-tar $TAR_FILE"
381 $FIND $TAR_DIR \( -type f -o -type l \) -a -printf "%s %p\n"
382 else
383 ############################################################################
384 # create new archive
385 # Note: In order to get file-ownership correct, we have to tar all
386 # all directories and parts of it found in any file-path to be added
387 # in the resulting archive. If we don't do that, we get for
388 # created (intermediate) directories the ownership of the
389 # extractor (e.g.).
390 # Therefore we call tar with the --no-recursion option.
391 ############################################################################
393 $TAR $TAR_WITH_ABSOLUTE_NAMES \
394 $CREATE_OPTS \
395 $TAR_NEWER "$TAR_NEWER_ARG" \
396 --files-from=$TAR_SOURCES \
397 -cv -f /dev/null
399 while read -r f
401 if [ -f "$f" ]
402 then wc -c "$f"
403 if [ "${f%/*}" != "$last_dir" ]
404 then last_dir=`dirname "$f"`
405 emit_dir_parts "$last_dir"
407 elif [ -d "$f" ]
408 then f=${f%/}
409 emit_dir_parts "$f"
410 last_dir="$f"
411 else echo "yyyyy $f"
413 done
415 ) | ( LC_ALL=C sort -u -s -k2; echo "xxxx xxxx"; ) | ( sed -e "s|/$||" ) |
416 while read -r size name
418 case $size in
419 xxx* ) [ -f $FILES ] && do_tar $FILES
421 yyy* ) # The file name must be stored too :-)
422 # but it will be compressed too
423 # Add it in any case (ok if we have very bad luck and we're
424 # saving a HUGE directory structure without any files
425 # the resulting archive would be too large).
426 size=$((size + ${#name} / 4))
427 cur_size=$((cur_size + size))
428 echo "$name" | sed -e"s|^$TAR_DIR||" >> $FILES
430 * ) if [ x"$COMPRESS_CMD" != x ]
431 then # compute estimate of compressed file size
432 case "${name##*.}" in
433 gz | zip | bzip | bzip2 | xz | rar ) ;; # already compressed
434 * ) if [ x"$COMPRESSION_RATE" = x ]
435 then size=`$COMPRESS_CMD "$name" | wc -c`
436 else size=$((size / $COMPRESSION_RATE))
439 esac
441 size=$((size + ${#name} / 4))
442 # the file name must be stored too :-)
443 # but it will be compressed too
444 if [ $size -ge $MAX_SIZE ]
445 then echo "$name" | sed -e"s|^$TAR_DIR||" > $FILE
446 do_tar $FILE
447 elif [ $((size + cur_size)) -ge $MAX_SIZE ]
448 then do_tar $FILES
449 cur_size=$size
450 # start new tar archive, so we need to emit all
451 # parts of the current files pathname (sorted)
452 cat /dev/null > $FILES
453 dir_names=$(emit_dir_parts_sorted "`dirname "$name"`")
454 if [ -n "$dir_names" ]; then
455 echo "$dir_names" | sed -e"s|^$TAR_DIR||" >> $FILES
457 echo "$name" | sed -e"s|^$TAR_DIR||" >> $FILES
458 else cur_size=$((cur_size + size))
459 echo "$name" | sed -e"s|^$TAR_DIR||" >> $FILES
461 esac
462 done
464 ##############################################################################
465 # T h e E n d
466 ##############################################################################
468 # Log: split-tar,v $
469 # Revision 1.30 2010/01/13 17:57:52 vollmer
470 # typoo
472 # Revision 1.29 2006/07/10 07:17:28 vollmer
473 # typoo
475 # Revision 1.28 2006/06/02 09:26:07 vollmer
476 # typoo
478 # Revision 1.27 2006/04/24 14:11:46 vollmer
479 # typoo
481 # Revision 1.26 2006/02/23 20:01:46 vollmer
482 # Now all directories get the correct time stamp.
483 # Sorting works as expected, even if non- 7-bit-ASCII letters are used
484 # by using LC_ALL=C.
485 # Thanks to one who wants to be unnamed for sending me the bug-fixes.
487 # Revision 1.25 2005/04/27 13:48:34 vollmer
488 # Add all intermediate directories of a path explicitly in order to get
489 # file/directory ownership correctly.
490 # Thanks to Tom Battisto <tbattist-AT-mailaka.net> for the bug report.
492 # Revision 1.24 2005/04/26 07:56:49 vollmer
493 # Directory persmissions are set now correctly when unpacking the archives.
494 # Thanks to Tom Battisto <tbattist-AT-mailaka.net> for the bug report.
496 # Revision 1.23 2005/04/08 20:52:32 vollmer
497 # Added option -T, thanks to Juergen Kainz <jkainz-AT-transflow.com>
499 # Revision 1.21 2005/04/08 20:14:22 vollmer
500 # added option -e
502 # Revision 1.20 2004/07/23 21:30:15 vollmer
503 # - added -f and -t options to specify a FIND and TAR program.
505 # Revision 1.18 2003/11/06 16:24:13 vollmer
506 # - options passed by -C to tar will be passed now to the do_tar routine
507 # - \ as part of file names are allowed now
508 # Thanks to A. R.
510 # Revision 1.17 2003/11/03 16:42:10 vollmer
511 # - Added option -N
512 # - The created tar files are stored now in the given directory and not
513 # in the current one.
514 # Thanks to Martin Walter <martin.walter-AT-erol.at>, who found that bug and
515 # asked for -N
517 # Revision 1.16 2003/10/31 13:01:51 vollmer
518 # Creating a splitted tar file from directory works now for absolute
519 # path names of the directory
521 # Revision 1.15 2003/09/18 17:10:40 vollmer
522 # Filenames containing blanks are processed correctly if given on the
523 # command line.
524 # Thanks to Dr. Jim McCaa <jmccaa-AT-ucar.edu>, who gave me the fix.
526 # Revision 1.14 2003/08/18 07:28:16 vollmer
527 # The number followed -s must be followed now by k m or g
528 # (in order to make `expr' more portable)
530 # Revision 1.13 2003/08/12 07:56:38 vollmer
531 # added an Example
533 # Revision 1.12 2003/08/12 07:22:27 vollmer
534 # fixed a bug found by Willem Penninckx <willem.penninckx-AT-belgacom.net>:
535 # filenames may contain now blanks and * and other shell emta charcters.
537 # Revision 1.11 2003/07/29 14:08:10 vollmer
538 # added the aibility to create the tar archive directly from the sources
539 # (option -c)
541 # Revision 1.10 2003/07/29 13:01:50 vollmer
542 # -s accepts size specifier k,K,m,M,g or G
544 # Revision 1.9 2003/07/29 12:38:17 vollmer
545 # improved computing expected size computation
547 # Revision 1.8 2003/07/21 07:55:32 vollmer
548 # added --no-name option to the gzip COMPRESS_CMD
550 # Revision 1.7 2003/07/15 08:27:04 vollmer
551 # - added bzip2, thanks to Martin Deinhofer <martin.deinhofer-AT-gesig.at>
552 # - added length of file names when computing the size
554 # Revision 1.0 2003/07/02 14:57:17 vollmer
555 # Initial revision
556 ##############################################################################