2 # reencode file names in git tree objects from Windows system encoding to UTF-8
6 echo "WARNING this script is not intended as a full-fledged and fool-proof
7 migration utility, but rather as a proof of concept or collection of examples.
10 In particular, the script doesn't check or fix any non-ascii names in config
11 files (such as .git/config, .gitignore or .gitmodules). For specific migration
12 tasks, some of the commands in this script may require tweaking.
14 Usage: recodetree <command>
15 recodetree check Checks git repository for non-ascii file names.
16 recodetree preview Same as 'check', but prints file names as if converted
17 from cp$(getcp) to UTF-8.
18 recodetree head Converts the current HEAD from cp$(getcp) to UTF-8.
19 Does not commit, use git status to check results first.
20 recodetree history Converts entire repository history from cp$(getcp) to UTF-8.
21 WARNING: this rewrites the history of the repository.
22 - Make a BACKUP copy of the repository before using this!
23 - Read 'git help filter-branch' for implications of
29 # use fast-export to dump all file names in the history
30 git fast-export
--no-data --signed-tags=strip
--tag-of-filtered-object=drop
--all |
31 # use awk to filter for non-ascii names
32 awk --posix '/^M [0-9]{6} [0-9a-f]{40} .*[\200-\377]/{print substr($0,51);}'
37 # convert from system encoding to UTF-8
38 iconv -c -f cp$
(getcp
) -t utf-8
44 git
rm -f -r -q --cached --ignore-unmatch \
*
45 # list specified commit, reencode and add to index
46 git ls-tree
-z -r $1 |
recode | git update-index
-z --index-info
63 git filter-branch
--index-filter 'recodetree filter' -- --all
67 # used internally by recodetree history
68 recode_tree
$GIT_COMMIT