3 # combine-packs.sh -- combine Git pack files
4 # Copyright (C) 2016,2017,2018,2020 Kyle J. McKay.
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 printf '%s\n' path-to-pack[.idx|.pack] ... |
24 $(basename "$0") [option]... [pack-objects option]... [pack-base-name]
26 NOTE: The following options MUST be given before any pack-objects options:
28 --replace on success, remove the input packs, see note below
29 (but any input packs with a .keep are never removed)
31 --names output the 40-char hex sha1 plus '\n' to stdout for each
32 newly created pack(s), if any
34 --ignore-missing silently ignore input pack file names that do not exist
36 --ignore-missing-objects
37 silently ignore missing objects (explicit objects when
38 using --objects otherwise those contained in input packs)
40 --loose add the list of all currently existing loose objects in
41 the repository to the list of objects to pack
43 --objects input is a list of object hash id values instead of packs
45 --envok allow use of GIT_OBJECT_DIRECTORY otherwise it is an error
46 to run combine-packs.sh with GIT_OBJECT_DIRECTORY set
49 sort tags by object id rather than embedded tag name
50 using this option avoids using perl when tags are present
52 --weak-naming use perl to produce weaker object names (and likely larger
53 output packs) instead of naming with rev-list '--objects'
54 (this option requires tree objects contain 20-byte hashes)
56 If --replace is given, ALL packs to be combined MUST be located in
57 the objects/pack subdirectory of the current git directory AND the output
58 pack base MUST also be omitted (meaning it defaults to objects/pack/pack).
60 The --loose option can be used both with and without the --objects option. If
61 there are no currently existing loose objects in the repository's objects/
62 directory then it's effectively silently ignored.
64 Note that if --objects is used then --replace and --ignore-missing are invalid.
66 Unless --ignore-missing-objects is given, any input objects (either given
67 explicitly when using --objects otherwise those contained in the input packs)
68 that are not present in the current git directory (respecting the value of
69 GIT_OBJECT_DIRECTORY if --envok is given) or its alternate object
70 directories, if any, will cause combine-packs to fail.
71 With this option any such objects are SILENTLY SKIPPED and do NOT appear in
74 Unless the --sort-tags-by-id option is used then perl will be used if available
75 and any tag objects are present in the input. It provides the only efficient
76 way to extract the embedded tag name from a batch of tag objects reliably.
77 However, since the only reason the tag name is extracted is to sort the tag
78 objects for better tag deltification, if the tag objects are sorted by the
79 tag object id there is never any need to run perl. In practice, tag objects
80 rarely generate deltas and there are almost never enough tag objects in the
81 first place for the size savings of the almost-never-happens tag
82 deltification to matter anyway. This option will be activated automatically
83 if perl does not appear to be available.
85 Normally all commit and tree objects to be packed are 'named' using the git
86 rev-list --objects command so that the best possible pack(s) can be produced.
87 This requires that all tree objects referenced from commits and trees being
88 packed (recursively for trees) as well as all the blobs referenced by them
89 must be present in the repository or else the 'rev-list --objects' command
90 used to name them will fail. As an alternative the --weak-naming option will
91 avoid use of the '--objects' option and name the contents of tree objects
92 using a perl script. The resulting names are good, but not _as_ good which
93 may produce a less efficiently packed pack. It does, however, permit packing
94 completely arbitrarily selected objects without error.
96 A 40-char hex sha1 is taken to be objects/pack/pack-<sha-1>.idx relative to
97 the current git directory (as output by \`git rev-parse --git-dir\` or
98 by \`git rev-parse --git-common-dir\` for Git version 2.5 or later).
100 If a <pack-name> does not exist and contains no '/' characters then it is
101 retried as objects/pack/<pack-name> instead.
103 Packs to be combined MUST have an associated .idx file.
105 The pack-base-name may be a relative path name and if so, is ALWAYS relative
106 to the current git directory regardless of any GIT_OBJECT_DIRECTORY setting.
108 If not given, then the pack-base-name defaults to objects/pack/pack
109 relative to the current git directory.
111 If GIT_OBJECT_DIRECTORY is set to a non-default location (and the --envok flag
112 is given to allow it) then everywhere above where it says \"objects/\" is
113 effectively replaced with the full absolute path to \"\$GIT_OBJECT_DIRECTORY/\".
114 And, obviously, that location is no longer necessarily a subdirectory of the
115 current git directory either.
117 Note that --delta-base-offset is ALWAYS passed to git pack-objects but it is
118 the ONLY option that is automatically passed (but remember that --reuse-delta
119 and --reuse-object are IMPLIED and must be explicitly disabled if desired).
121 The options --revs, --unpacked, --all, --reflog, --indexed-objects and
122 --stdout are forbidden. Although --keep-true-parents is allowed it should
123 not have any effect at all. Using --incremental is recommended only for
124 wizards or with --objects as in most other cases it will result in an empty
125 pack being output. The combination of --loose --objects --incremental will
126 pack up all loose objects not already in a pack (and nothing else if standard
127 input is redirected to /dev/null in which case the --objects is optional).
129 WARNING: the move_aside logic currently only works when pack-base-name is
135 # $$ should be the same in subshells, but just in case, remember it
140 #line 140 "combine-packs.sh"
147 while ($count >= 32768) {
148 read(STDIN, $x, 32768);
151 read(STDIN, $x, $count) if $count;
157 if (/^([0-9a-fA-F]+) ([^ ]+) ([0-9]+)$/) {
158 my ($h, $t, $l) = ($1, $2, $3);
161 discard(1 + $l), next unless $2 eq "tag";
164 $count += length($_);
167 $tn = $1 if /^tag ([^ ]+)$/;
168 $te = $1 if /^tagger [^>]+> ([0-9]+)/;
171 discard(1 + $l - $count);
172 push(@tags, [$te, "$h $tn\n"]);
175 print map($$_[1], sort({$$b[0] <=> $$a[0]} @tags));
179 #line 179 "combine-packs.sh"
187 while ($count >= 32768) {
188 $len = read(STDIN, $x, 32768);
189 defined($len) && $len == 32768 or die "bad --batch output";
193 $len = read(STDIN, $x, $count);
194 defined($len) && $len == $count or die "bad --batch output";
201 if (/^([0-9a-fA-F]+) ([^ ]+) ([0-9]+)$/) {
202 my ($h, $t, $l) = ($1, $2, $3);
203 discard(1 + $l), next unless $2 eq "tree";
206 $count = read(STDIN, $tr, $l) if $l;
207 defined($count) && $count == $l or die "bad --batch output";
210 print $ln, " ", $h, " ~~~~ \n";
213 $pos < $l && ($loc = index($tr, "\0", $pos)) > $pos && $loc + 20 < $l;
214 $pos = $loc + 20 + 1) {
215 substr($tr, $pos, $loc - $pos) =~ /^([0-7]{5,6}) (.*)$/os or die "bad --batch output";
216 my ($mode, $name) = (oct($1), $2);
217 $mode == 0100644 || $mode == 040000 or next;
220 $r =~ s/[\t\n\013\f\r ]+//gos;
221 $r = substr(reverse($r), 0, 16);
222 my $i = unpack("H*", substr($tr, $loc + 1, 20));
225 print $ln, " ", $i, " ", $r, " ", $name, "\n";
227 print $ln, " ", $i, " ", $name, "\n";
230 $pos == $l or die "bad --batch output";
243 [ -n "$td" ] && [ -e "$td/success" ] || ewf
=1
244 [ -z "$td" ] ||
! [ -e "$td" ] ||
rm -rf "$td" ||
:
245 [ -z "$gdop" ] ||
[ -z "$zap" ] ||
command find -L "$gdop" -maxdepth 1 -type f
-name "*.$zap" -exec rm -f '{}' + ||
:
246 [ -z "$ewf" ] ||
echo "combine-packs: exiting with failure${gd:+ (${gd##*/})}" >&2 ||
:
249 trap cleanup_on_exit EXIT
256 echo "combine-packs: fatal: $*" >&2 ||
:
257 # In case we are in a sub shell force the entire command to exit
258 # The trap on TERM will make sure cleanup still happens in this case
260 [ -z "$td" ] ||
! [ -s "$td/popid" ] || extrapid
="$(cat "$td/popid
")" ||
:
261 kill $cp_pid $extrapid ||
:
266 "unset" -f unalias command "$1" >/dev
/null
2>&1 ||
:
267 "unalias" -a >/dev
/null
2>&1 ||
:
271 # This extra indirection shouldn't be necessary, but it is for some broken sh
272 # in order for a failure to not prematurely exit die_on_fail with set -e active
274 # some shells do not handle "exec command ..." properly but just a
275 # plain "exec ..." has the same semantics so "command" is omitted here
282 [ -z "$td" ] ||
>"$td/failed" ||
:
283 die
"failed command ($_ec): $*"
287 # These commands may be the non-final member of a pipe and
288 # MUST NOT be allowed to silently fail without consequence
289 awk() { die_on_fail
awk "$@"; }
290 cat() { die_on_fail
cat "$@"; }
291 cut
() { die_on_fail cut
"$@"; }
292 find() { die_on_fail
find "$@"; }
293 git
() { die_on_fail git
"$@"; }
294 join() { die_on_fail
join "$@"; }
295 perl
() { die_on_fail perl
"$@"; }
296 sed() { die_on_fail
sed "$@"; }
297 sort() { die_on_fail
sort "$@"; }
300 octet
="$hexdig$hexdig"
301 octet4
="$octet$octet$octet$octet"
302 octet19
="$octet4$octet4$octet4$octet4$octet$octet$octet"
303 octet20
="$octet4$octet4$octet4$octet4$octet4"
315 while [ $# -ge 1 ]; do case "$1" in
328 --ignore-missing-objects)
334 if [ -t 1 ] && pg
="$(git var GIT_PAGER 2>/dev/null)" && [ -n "$pg" ]; then
335 printf '%s' "${USAGE#?}" |
eval "$pg" ||
:
337 printf '%s' "${USAGE#?}" ||
:
365 [ -z "$ignoremiss$dozap" ] ||
[ -z "$objectlist" ] || die
"invalid options"
367 [ -n "$noperl" ] && [ -z "$weak" ] ||
{ perlbin
="$(cmd_path perl)" && [ -n "$perlbin" ]; } || noperl
=1
368 [ -z "$weak" ] ||
[ -n "$perlbin" ] || die
"--weak-naming requires perl"
370 # Always make sure we get the specified objects
371 GIT_NO_REPLACE_OBJECTS
=1
372 export GIT_NO_REPLACE_OBJECTS
373 gd
="$(git rev-parse --git-dir)" && [ -n "$gd" ] ||
374 die
"git rev-parse --git-dir failed"
375 gv
="$(git --version)"
376 gv
="${gv#[Gg]it version }"
378 IFS
=.
read -r gvmaj gvmin gvpat
<<EOT
381 : "${gvmaj:=0}" "${gvmin:=0}" "${gvpat:=0}"
382 # git rev-parse added --no-walk support in 1.5.3 which is required
383 # git cat-file added --batch-check support in 1.5.6 which is required
384 if [ $gvmaj -lt 1 ] ||
{ [ $gvmaj -eq 1 ] && [ $gvmin -lt 5 ]; } ||
385 { [ $gvmaj -eq 1 ] && [ $gvmin -eq 5 ] && [ $gvpat -lt 6 ]; }; then
386 die
"combine-packs requires at least Git version 1.5.6"
389 gd
="$(cd "$gd" && pwd -P)" || die
"cd failed: $tmp"
390 # git rev-parse added --git-common-dir in 2.5
391 if [ $gvmaj -gt 2 ] ||
{ [ $gvmaj -eq 2 ] && [ $gvmin -ge 5 ]; }; then
392 # rev-parse --git-common-dir is broken and may give an
393 # incorrect result without a suitable current directory
395 gd
="$(cd "$gd" && cd "$
(git rev-parse
--git-common-dir)" && pwd -P)" &&
397 die
"git rev-parse --git-common-dir failed from: $tmp"
399 # gcfbf is Git Cat-File --Batch-check=Format Option :)
401 if [ $gvmaj -gt 1 ] ||
{ [ $gvmaj -eq 1 ] && [ $gvmin -gt 8 ]; } ||
402 { [ $gvmaj -eq 1 ] && [ $gvmin -eq 8 ] && [ $gvpat -ge 5 ]; }; then
403 gcfbf
='=%(objectname) %(objecttype)'
405 # gcfbo is Git Cat-File --Buffer Option :)
407 if [ $gvmaj -gt 2 ] ||
{ [ $gvmaj -eq 2 ] && [ $gvmin -ge 6 ]; }; then
410 if [ "${GIT_OBJECT_DIRECTORY+set}" = "set" ] && [ -z "$envok" ]; then
411 # GIT_OBJECT_DIRECTORY may only be set to $gd/objects without --envok
413 if [ -n "$GIT_OBJECT_DIRECTORY" ] && [ -d "$GIT_OBJECT_DIRECTORY" ] &&
414 [ -d "$gd/objects" ] && godfp
="$(cd "$GIT_OBJECT_DIRECTORY" && pwd -P)" &&
415 gdofp
="$(cd "$gd/objects
" && pwd -P)" && [ -n "$godfp" ] && [ -n "$gdofp" ] &&
416 [ "$gdofp" = "$godfp" ]; then
419 if [ -z "$godok" ]; then
420 die
"GIT_OBJECT_DIRECTORY set to non-default location without --envok"
423 gdo
="${GIT_OBJECT_DIRECTORY:-$gd/objects}"
425 gdo
="$(cd "$gdo" && pwd -P)" || die
"cd failed: $tmp"
426 [ -d "$gdo/pack" ] || die
"no such directory: $gdo/pack"
427 gdop
="$(cd "$gdo/pack
" && pwd -P)" || die
"cd failed: $gdo/pack"
439 --replace|
--names|
--ignore-missing|
-h|
--help|
--objects)
440 die
"invalid options"
442 --revs|
--unpacked|
--all|
--reflog|
--indexed-objects)
443 die
"forbidden pack-objects options"
450 nonopts
=$
(( $nonopts + 1 ))
453 if [ $# -gt 0 ] && [ $nonopts -gt 1 ] ||
454 { [ $nonopts -eq 1 ] && [ -n "$lastargopt" ]; } ||
455 { [ $nonopts -eq 1 ] && [ -z "$lastarg" ]; }; then
456 die
"invalid options"
458 if [ $nonopts -eq 1 ]; then
461 packbase
="$gdop/pack"
463 pbd
="$(dirname "$packbase")"
464 case "$pbd" in /*);;*)
467 [ -e "$pbd" ] && [ -d "$pbd" ] || die
"no such directory: $(dirname "$packbase")"
468 packbase
="$(cd "$pbd" && pwd -P)/$(basename "$packbase")"
469 pbd
="$(dirname "$packbase")"
470 [ -e "$pbd" ] && [ -d "$pbd" ] || die
"internal failure realpathing: $packbase"
471 packbasecheck
="$packbase"
472 case "$packbase" in "$gd"/?
*)
473 packbase
="${packbase#$gd/}"
475 [ $nonopts -eq 1 ] || packbasearg
="$packbase"
476 [ -z "$zap" ] ||
[ -n "$packbasearg" ] || die
"--replace does not allow specifying pack-base"
477 if [ -n "$zap" ] && [ "$(dirname "$packbasecheck")" != "$gdop" ] ; then
478 die
"--replace and pack base dir not <git-dir-objects>/pack" >&2
481 td
="$(mktemp -d "$gd/cmbnpcks-XXXXXX
")"
482 tdmin
="$(basename "$td")"
487 success
="$td/success"
494 trbl
="$tdmin/treesblobs"
496 named2
="$tdmin/named2"
502 _name
="$gdop/pack-$_name"
505 _name
="${_name%.idx}"
508 _name
="${_name%.pack}"
511 if ! [ -e "$_name.idx" ] && ! [ -e "$_name.pack" ]; then
512 case "$_name" in */*) :;; *)
516 if ! [ -f "$_name.idx" ] ||
! [ -s "$_name.idx" ] ||
517 ! [ -f "$_name.pack" ] ||
! [ -s "$_name.pack" ]; then
518 [ -z "$ignoremiss" ] ||
return 0
519 die
"no such pack found matching: $1" >&2
521 _name
="$(cd "$
(dirname "$_name")" && pwd -P)/$(basename "$_name")"
522 if ! [ -f "$_name.idx" ] ||
! [ -s "$_name.idx" ] ||
523 ! [ -f "$_name.pack" ] ||
! [ -s "$_name.pack" ]; then
524 die
"internal failure realpathing: $1" >&2
527 case "$(dirname "$_name")" in "$gd"/?
*)
528 _name
="${_name#$gd/}"
530 if [ -n "$zap" ] && [ "$(dirname "$_namecheck")" != "$gdop" ]; then
531 die
"--replace and pack not in <git-dir-objects>/pack: $1" >&2
539 _dupetmp
="$(mktemp "$
(dirname "$2")/packtmp-XXXXXX
")"
540 cp -fp "$1" "$_dupetmp"
541 mv -f "$_dupetmp" "$2"
544 # add "old" prefix to passed in existing files, but be careful to hard-link
545 # ALL the files to be renamed to the renamed name BEFORE removing anything
549 ln -f "$_f" "$(dirname "$_f")/old$(basename "$_f")" >/dev
/null
2>&1 ||
550 dupe_file
"$_f" "$(dirname "$_f")/old$(basename "$_f")"
553 if [ -f "$_f" ]; then
561 list_loose_objects
() (
562 cd "$gdo" ||
return 1
563 objdirs
="$(echo $octet)"
564 [ "$objdirs" != "$octet" ] ||
return 0
565 find -L $objdirs -mindepth 1 -maxdepth 1 -type f
-name "$octet19*" -print |
sed 's,/,,'
574 if [ -n "$objectlist" ]; then
575 gcf
='git cat-file $gcfbo --batch-check"$gcfbf"'
576 [ -z "$looselist" ] || gcf
='{ list_loose_objects && cat; } | '"$gcf"
579 [ -z "$zap" ] ||
command find -L "$gdop" -maxdepth 1 -type f
-name "*.$zap" -exec rm -f '{}' + ||
:
581 [ -z "$looselist" ] || list_loose_objects
582 while IFS
=': ' read -r packraw junk
; do
583 pack
="$(cd "$origdir" && get_pack_base "$packraw" || die "no such pack
: $packraw")"
584 if [ -n "$pack" ]; then
585 [ -z "$zap" ] ||
[ -e "$pack.keep" ] ||
>"$pack.$zap"
586 git show-index
<"$pack.idx"
588 done | cut
-d ' ' -f 2
589 } | git cat-file
$gcfbo --batch-check"$gcfbf"
591 if ($2=="tree") print $1
592 else if ($2=="blob") print $1 >"'"$bl"'"
593 else if ($2=="commit") print $1 >"'"$cm"'"
594 else if ($2=="tag") print $1 >"'"$tg"'"
595 else if ($2=="missing") print $1 >"'"$ms"'"
597 [ -n "$missok" ] ||
! [ -s "$ms" ] || die
"missing" $
(wc -l <"$ms") "object(s)"
598 echo "g" |
cat "$tr" "$bl" - |
sort -u >"$trbl"
599 if [ -z "$weak" ]; then
600 git rev-list
--no-walk --objects --stdin <"$cm" |
602 if ($1!=$0) print NR " " $0
603 else print $0 >"'"$cmo"'"
606 join -t " " -1 2 - "$trbl" >"$named"
607 join -t " " -v 1 "$tr" "$named" |
608 git rev-list
--no-walk --objects --stdin |
609 awk '{print NR " " $0}' |
611 join -t " " -1 2 - "$trbl" >"$named2"
613 ! [ -s "$cm" ] || git rev-list
--no-walk --stdin <"$cm" >"$cmo"
614 git cat-file
$gcfbo --batch <"$tr" |
615 perl
-e "$perlnameprog" |
617 join -t " " -1 2 - "$trbl" >"$named"
619 pocmd
='git pack-objects --delta-base-offset "$@"'
620 [ -z "$packbasearg" ] || pocmd
="$pocmd \"${packbasearg}tmp\""
624 if [ -n "$noperl" ]; then
627 git cat-file
$gcfbo --batch <"$tg" | perl
-e "$perltagprog"
630 if [ -z "$weak" ]; then
632 join -t " " "$named" "$tr" |
634 join -t " " "$named2" "$tr" |
636 } |
sed -e 's/\([^ ][^ ]*\) [^ ][^ ]*/\1/'
638 join -t " " -v 1 "$named" "$tr" |
640 join -t " " -v 1 "$named2" "$tr" |
644 nm = substr($0, length($1) + length($2) + 3)
646 gsub(/[\t\n\013\f\r ]+/, "", sfx)
648 if (length(sfx) > 16) sfx = substr(sfx, length(sfx) - 15)
649 else if (length(sfx) < 16) sfx = sprintf("%16s", sfx)
651 r = c[16] c[15] c[14] c[13] c[12] c[11] c[10] c[9] c[8] c[7] c[6] c[5] c[4] c[3] c[2] c[1]
653 print NR " " $1 " " r " " nm
654 } else print NR " " $1 " " nm
655 } else print NR " " $1 " "
656 }' |
sort -t " " -k3,3 -k1,1n |
awk -F '[ ]' '{
658 nm = substr($0, length($1) + length($2) + length($3) + 4)
664 join -t " " "$named" "$tr" |
665 sort -t " " -k3,3 -k2,2n
666 join -t " " -v 1 "$named" "$tr" |
667 sort -t " " -k3,3 -k2,2n
670 nm = substr($0, length($1) + length($2) + length($3) + 4)
678 sh
-c 'echo $$ >"$1"; pocmd="$2"; shift; shift; eval "exec $pocmd"' sh
"$popid" "$pocmd" "$@" ||
{
680 die
"git pack-objects failed"
685 while read -r newpack
; do
686 if [ -n "$packbasearg" ]; then
687 move_aside
"$packbasearg"-$newpack.
*
688 ln -f "${packbasearg}tmp"-$newpack.pack
"$packbasearg"-$newpack.pack
689 ln -f "${packbasearg}tmp"-$newpack.idx
"$packbasearg"-$newpack.idx
690 rm -f "${packbasearg}tmp"-$newpack.
*
692 [ -z "$names" ] ||
echo "$newpack"
694 [ $?
-eq 0 ] && ! [ -e "$failed" ] && [ -e "$listok" ] && [ -e "$packok" ] ||
695 die
"unspecified failure"
696 if [ -n "$zap" ]; then
697 (cd "$gdo" && [ -d "pack" ] && find -L "pack" -maxdepth 1 -type f
-name "*.$zap" -print) |
698 while read -r remove
; do
699 rm -f "$gdo/${remove%.$zap}".
*