3 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
7 # usage: dirdiffer.sh old_dir new_dir patch_dir
9 # dirdiffer creates a patch directory patch_dir that represents the difference
10 # between old_dir and new_dir. patch_dir can be used with dirpatcher to
11 # recreate new_dir given old_dir.
13 # dirdiffer operates recursively, properly handling ordinary files, symbolic
14 # links, and directories, as they are found in new_dir. Symbolic links and
15 # directories are always replicated as-is in patch_dir. Ordinary files will
16 # be represented at the appropriate location in patch_dir by one of the
19 # - a binary diff prepared by goobsdiff that can transform the file at the
20 # same position in old_dir to the version in new_dir, but only when such a
21 # file already exists in old_dir and is an ordinary file. These files are
22 # given a "$gbs" suffix.
23 # - a bzip2-compressed copy of the new file from new_dir; in patch_dir, the
24 # new file will have a "$bz2" suffix.
25 # - a gzip-compressed copy of the new file from new_dir; in patch_dir, the
26 # new file will have a "$gz" suffix.
27 # - an xz/lzma2-compressed copy of the new file from new_dir; in patch_dir,
28 # the new file will have an "$xz" suffix.
29 # - an uncompressed copy of the new file from new_dir; in patch_dir, the
30 # new file will have a "$raw" suffix.
32 # The unconventional suffixes are used because they aren't likely to occur in
35 # Of these options, the smallest possible representation is chosen. Note that
36 # goobsdiff itself will also compress various sections of a binary diff with
37 # bzip2, gzip, or xz/lzma2, or leave them uncompressed, according to which is
38 # smallest. The approach of choosing the smallest possible representation is
39 # time-consuming but given the choices of compressors results in an overall
40 # size reduction of about 3%-5% relative to using bzip2 as the only
41 # compressor; bzip2 is generally more effective for these data sets than gzip,
42 # and xz/lzma2 more effective than bzip2.
44 # For large input files, goobsdiff is also very time-consuming and
45 # memory-intensive. The overall "wall clock time" spent preparing a patch_dir
46 # representing the differences between Google Chrome's 6.0.422.0 and 6.0.427.0
47 # versioned directories from successive weekly dev channel releases on a
48 # 2.53GHz dual-core 4GB MacBook Pro is 3 minutes. Reconstructing new_dir with
49 # dirpatcher is much quicker; in the above configuration, only 10 seconds are
50 # needed for reconstruction.
52 # After creating a full patch_dir structure, but before returning, dirpatcher
53 # is invoked to attempt to recreate new_dir in a temporary location given
54 # old_dir and patch_dir. The recreated new_dir is then compared against the
55 # original new_dir as a verification step. Should verification fail, dirdiffer
56 # exits with a nonzero status, and patch_dir should not be used.
58 # Environment variables:
60 # When an entry in new_dir matches this regular expression, it will not be
61 # included in patch_dir. All prospective paths in new_dir will be matched
62 # against this regular expression, including directories. If a directory
63 # matches this pattern, dirdiffer will also ignore the directory's contents.
65 # When an entry in new_dir matches this regular expression, it will not be
66 # represented in patch_dir by a $gbs file prepared by goobsdiff. It will only
67 # appear as a $bz2, $gz, or $raw file. Only files in new_dir, not
68 # directories, will be matched against this regular expression.
73 # 2 Incorrect number of parameters
74 # 3 Input directories do not exist or are not directories
75 # 4 Output directory already exists
76 # 5 Parent of output directory does not exist or is not a directory
77 # 6 An input or output directories contains another
78 # 7 Could not create output directory
79 # 8 File already exists in output directory
80 # 9 Found an irregular file (non-directory, file, or symbolic link) in input
81 # 10 Could not create symbolic link
83 # 12 bzip2 compression failed
84 # 13 gzip compression failed
85 # 14 xz/lzma2 compression failed
86 # 15 Patch creation failed
87 # 16 Verification failed
88 # 17 Could not set mode (permissions)
89 # 18 Could not set modification time
90 # 19 Invalid regular expression (irregular expression?)
94 # Environment sanitization. Set a known-safe PATH. Clear environment variables
95 # that might impact the interpreter's operation. The |bash -p| invocation
96 # on the #! line takes the bite out of BASH_ENV, ENV, and SHELLOPTS (among
97 # other features), but clearing them here ensures that they won't impact any
98 # shell scripts used as utility programs. SHELLOPTS is read-only and can't be
99 # unset, only unexported.
100 export PATH
="/usr/bin:/bin:/usr/sbin:/sbin"
101 unset BASH_ENV CDPATH ENV GLOBIGNORE IFS POSIXLY_CORRECT
104 shopt -s dotglob nullglob
106 # find_tool looks for an executable file named |tool_name|:
107 # - in the same directory as this script,
108 # - if this script is located in a Chromium source tree, at the expected
109 # Release output location in the Mac out directory,
110 # - as above, but in the Debug output location
111 # If found in any of the above locations, the script's path is output.
112 # Otherwise, this function outputs |tool_name| as a fallback, allowing it to
113 # be found (or not) by an ordinary ${PATH} search.
115 local tool_name
="${1}"
118 script_dir
="$(dirname "${0}")"
120 local tool
="${script_dir}/${tool_name}"
121 if [[ -f "${tool}" ]] && [[ -x "${tool}" ]]; then
126 local script_dir_phys
127 script_dir_phys
="$(cd "${script_dir}" && pwd -P)"
128 if [[ "${script_dir_phys}" =~ ^
(.
*)/src
/chrome
/installer
/mac$
]]; then
129 tool
="${BASH_REMATCH[1]}/src/out/Release/${tool_name}"
130 if [[ -f "${tool}" ]] && [[ -x "${tool}" ]]; then
135 tool
="${BASH_REMATCH[1]}/src/out/Debug/${tool_name}"
136 if [[ -f "${tool}" ]] && [[ -x "${tool}" ]]; then
145 ME
="$(basename "${0}")"
147 DIRPATCHER
="$(dirname "${0}")/dirpatcher.sh"
149 GOOBSDIFF
="$(find_tool goobsdiff)"
151 readonly BZIP2
="bzip2"
155 readonly GBS_SUFFIX
='$gbs'
156 readonly BZ2_SUFFIX
='$bz2'
157 readonly GZ_SUFFIX
='$gz'
158 readonly XZ_SUFFIX
='$xz'
159 readonly PLAIN_SUFFIX
='$raw'
161 # Workaround for http://code.google.com/p/chromium/issues/detail?id=83180#c3
162 # In bash 4.0, "declare VAR" no longer initializes VAR if not already set.
163 : ${DIRDIFFER_EXCLUDE:=}
164 : ${DIRDIFFER_NO_DIFF:=}
169 echo "${ME}: ${error}" >& 2
172 declare -a g_cleanup g_verify_exclude
177 trap '' HUP INT QUIT TERM
179 if [[ ${status} -ge 128 ]]; then
180 err
"Caught signal $((${status} - 128))"
183 if [[ "${#g_cleanup[@]}" -gt 0 ]]; then
184 rm -rf "${g_cleanup[@]}"
190 copy_mode_and_time
() {
191 local new_file
="${1}"
192 local patch_file
="${2}"
195 mode
="$(stat "-f%OMp
%OLp
" "${new_file}")"
196 if ! chmod -h "${mode}" "${patch_file}"; then
200 if ! [[ -L "${patch_file}" ]]; then
201 # Symbolic link modification times can't be copied because there's no
202 # shell tool that provides direct access to lutimes. Instead, the symbolic
203 # link was created with rsync, which already copied the timestamp with
205 if ! touch -r "${new_file}" "${patch_file}"; then
218 local old_file
="${1}"
219 local new_file
="${2}"
220 local patch_file
="${3}"
222 local uncompressed_file
="${patch_file}${PLAIN_SUFFIX}"
223 if ! cp "${new_file}" "${uncompressed_file}"; then
226 local uncompressed_size
227 uncompressed_size
="$(file_size "${new_file}")"
229 local keep_file
="${uncompressed_file}"
230 local keep_size
="${uncompressed_size}"
232 local bz2_file
="${patch_file}${BZ2_SUFFIX}"
233 if [[ -e "${bz2_file}" ]]; then
234 err
"${bz2_file} already exists"
237 if ! "${BZIP2}" -9c < "${new_file}" > "${bz2_file}"; then
238 err
"couldn't compress ${new_file} to ${bz2_file} with ${BZIP2}"
242 bz2_size
="$(file_size "${bz2_file}")"
244 if [[ "${bz2_size}" -ge "${keep_size}" ]]; then
248 keep_file
="${bz2_file}"
249 keep_size
="${bz2_size}"
252 local gz_file
="${patch_file}${GZ_SUFFIX}"
253 if [[ -e "${gz_file}" ]]; then
254 err
"${gz_file} already exists"
257 if ! "${GZIP}" -9cn < "${new_file}" > "${gz_file}"; then
258 err
"couldn't compress ${new_file} to ${gz_file} with ${GZIP}"
262 gz_size
="$(file_size "${gz_file}")"
264 if [[ "${gz_size}" -ge "${keep_size}" ]]; then
268 keep_file
="${gz_file}"
269 keep_size
="${gz_size}"
272 local xz_flags
=("-c")
274 # If the file looks like a Mach-O file, including a universal/fat file, add
275 # the x86 BCJ filter, which results in slightly better compression of x86
276 # and x86_64 executables. Mach-O files might contain other architectures,
277 # but they aren't currently expected in Chrome.
279 file_output
="$(file "${new_file}" 2> /dev/null || true)"
280 if [[ "${file_output}" =~ Mach-O
]]; then
284 # Use an lzma2 encoder. This is equivalent to xz -9 -e, but allows filters
285 # to precede the compressor.
286 xz_flags
+=("--lzma2=preset=9e")
288 local xz_file
="${patch_file}${XZ_SUFFIX}"
289 if [[ -e "${xz_file}" ]]; then
290 err
"${xz_file} already exists"
293 if ! "${XZ}" "${xz_flags[@]}" < "${new_file}" > "${xz_file}"; then
294 err
"couldn't compress ${new_file} to ${xz_file} with ${XZ}"
298 xz_size
="$(file_size "${xz_file}")"
300 if [[ "${xz_size}" -ge "${keep_size}" ]]; then
304 keep_file
="${xz_file}"
305 keep_size
="${xz_size}"
308 if [[ -f "${old_file}" ]] && ! [[ -L "${old_file}" ]] &&
309 ! [[ "${new_file}" =~
${DIRDIFFER_NO_DIFF} ]]; then
310 local gbs_file
="${patch_file}${GBS_SUFFIX}"
311 if [[ -e "${gbs_file}" ]]; then
312 err
"${gbs_file} already exists"
315 if ! "${GOOBSDIFF}" "${old_file}" "${new_file}" "${gbs_file}"; then
316 err
"couldn't create ${gbs_file} by comparing ${old_file} to ${new_file}"
320 gbs_size
="$(file_size "${gbs_file}")"
322 if [[ "${gbs_size}" -ge "${keep_size}" ]]; then
326 keep_file
="${gbs_file}"
327 keep_size
="${gbs_size}"
331 copy_mode_and_time
"${new_file}" "${keep_file}"
334 make_patch_symlink
() {
335 local new_file
="${1}"
336 local patch_file
="${2}"
339 # target="$(readlink "${new_file}")"
340 # ln -s "${target}" "${patch_file}"
342 # Use rsync instead of the above, as it's the only way to preserve the
343 # timestamp of a symbolic link using shell tools.
344 if ! rsync
-lt "${new_file}" "${patch_file}"; then
348 copy_mode_and_time
"${new_file}" "${patch_file}"
354 local patch_dir
="${3}"
356 if ! mkdir
"${patch_dir}"; then
361 for new_file
in "${new_dir}/"*; do
362 local file="${new_file:${#new_dir} + 1}"
363 local old_file
="${old_dir}/${file}"
364 local patch_file
="${patch_dir}/${file}"
366 if [[ "${new_file}" =~
${DIRDIFFER_EXCLUDE} ]]; then
367 g_verify_exclude
+=("${new_file}")
371 if [[ -e "${patch_file}" ]]; then
372 err
"${patch_file} already exists"
376 if [[ -L "${new_file}" ]]; then
377 make_patch_symlink
"${new_file}" "${patch_file}"
378 elif [[ -d "${new_file}" ]]; then
379 make_patch_dir
"${old_file}" "${new_file}" "${patch_file}"
380 elif [[ ! -f "${new_file}" ]]; then
381 err
"can't handle irregular file ${new_file}"
384 make_patch_file
"${old_file}" "${new_file}" "${patch_file}"
388 copy_mode_and_time
"${new_dir}" "${patch_dir}"
394 local patch_dir
="${3}"
396 local verify_temp_dir verify_dir
397 verify_temp_dir
="$(mktemp -d -t "${ME}")"
398 g_cleanup
+=("${verify_temp_dir}")
399 verify_dir
="${verify_temp_dir}/patched"
401 if ! "${DIRPATCHER}" "${old_dir}" "${patch_dir}" "${verify_dir}"; then
402 err
"patch application for verification failed"
406 # rsync will print a line for any file, directory, or symbolic link that
407 # differs or exists only in one directory. As used here, it correctly
408 # considers link targets, file contents, permissions, and timestamps.
409 local rsync_command
=(rsync
-clprt --delete --out-format=%n \
410 "${new_dir}/" "${verify_dir}")
411 if [[ ${#g_verify_exclude[@]} -gt 0 ]]; then
413 for exclude
in "${g_verify_exclude[@]}"; do
414 # ${g_verify_exclude[@]} contains paths in ${new_dir}. Strip off
415 # ${new_dir} from the beginning of each, but leave a leading "/" so that
416 # rsync treats them as being at the root of the "transfer."
417 rsync_command
+=("--exclude" "${exclude:${#new_dir}}")
422 if ! rsync_output
="$("${rsync_command[@]}")"; then
423 err
"rsync for verification failed"
427 rm -rf "${verify_temp_dir}"
428 unset g_cleanup
[${#g_cleanup[@]}]
430 if [[ -n "${rsync_output}" ]]; then
431 err
"verification failed"
436 # shell_safe_path ensures that |path| is safe to pass to tools as a
437 # command-line argument. If the first character in |path| is "-", "./" is
438 # prepended to it. The possibly-modified |path| is output.
441 if [[ "${path:0:1}" = "-" ]]; then
452 if [[ "${dir1:0:${#dir2}}" = "${dir2}" ]] ||
453 [[ "${dir2:0:${#dir1}}" = "${dir1}" ]]; then
461 echo "usage: ${ME} old_dir new_dir patch_dir" >& 2
465 local old_dir new_dir patch_dir
466 old_dir
="$(shell_safe_path "${1}")"
467 new_dir
="$(shell_safe_path "${2}")"
468 patch_dir
="$(shell_safe_path "${3}")"
470 trap cleanup EXIT HUP INT QUIT TERM
472 if ! [[ -d "${old_dir}" ]] ||
! [[ -d "${new_dir}" ]]; then
473 err
"old_dir and new_dir must exist and be directories"
478 if [[ -e "${patch_dir}" ]]; then
479 err
"patch_dir must not exist"
484 local patch_dir_parent
485 patch_dir_parent
="$(dirname "${patch_dir}")"
486 if ! [[ -d "${patch_dir_parent}" ]]; then
487 err
"patch_dir parent directory must exist and be a directory"
492 # The weird conditional structure is because the status of the RE comparison
493 # needs to be available in ${?} without conflating it with other conditions
494 # or negating it. Only a status of 2 from the =~ operator indicates an
495 # invalid regular expression.
497 if [[ -n "${DIRDIFFER_EXCLUDE}" ]]; then
498 if [[ "" =~
${DIRDIFFER_EXCLUDE} ]]; then
500 elif [[ ${?} -eq 2 ]]; then
501 err
"DIRDIFFER_EXCLUDE contains an invalid regular expression"
506 if [[ -n "${DIRDIFFER_NO_DIFF}" ]]; then
507 if [[ "" =~
${DIRDIFFER_NO_DIFF} ]]; then
509 elif [[ ${?} -eq 2 ]]; then
510 err
"DIRDIFFER_NO_DIFF contains an invalid regular expression"
515 local old_dir_phys new_dir_phys patch_dir_parent_phys patch_dir_phys
516 old_dir_phys
="$(cd "${old_dir}" && pwd -P)"
517 new_dir_phys
="$(cd "${new_dir}" && pwd -P)"
518 patch_dir_parent_phys
="$(cd "${patch_dir_parent}" && pwd -P)"
519 patch_dir_phys
="${patch_dir_parent_phys}/$(basename "${patch_dir}")"
521 if dirs_contained
"${old_dir_phys}" "${new_dir_phys}" ||
522 dirs_contained
"${old_dir_phys}" "${patch_dir_phys}" ||
523 dirs_contained
"${new_dir_phys}" "${patch_dir_phys}"; then
524 err
"directories must not contain one another"
529 g_cleanup
[${#g_cleanup[@]}]="${patch_dir}"
531 make_patch_dir
"${old_dir}" "${new_dir}" "${patch_dir}"
533 verify_patch_dir
"${old_dir}" "${new_dir}" "${patch_dir}"
535 unset g_cleanup
[${#g_cleanup[@]}]
539 if [[ ${#} -ne 3 ]]; then