3 git-export-filter.c -- filter/transform git fast-export data streams
4 Copyright (C) 2013,2014,2019 Kyle J. McKay. All rights reserved.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License along
17 with this program; if not, write to the Free Software Foundation, Inc.,
18 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
29 #define COPYSIZE 65536
31 #define SPACETAB " \t"
32 #define DIGITS "0123456789"
34 typedef struct transform_s
{
39 static const char *const gHelp
=
40 #include "git-export-help.inc"
43 static const char *const gUsage
=
44 "git-export-filter [--authors-file file] [--branches-file file]\n"
45 " [--convert-tagger id] [--require-authors] [--trunk-is-master]\n"
46 " [--strip-at-suffix] [--expand-renames] [in] > out\n"
47 "(use git-export-filter -v -h for detailed help)\n";
49 static const char *const gVersion
=
50 "git-export-filter version 1.5.1\n";
52 static const char *me
= "git-export-filter";
54 static const char *authorsfile
= NULL
;
55 static const char *branchesfile
= NULL
;
56 static const char *convertid
= NULL
;
57 static int opt_verbose
= 0;
59 static int opt_version
= 0;
60 static int opt_help
= 0;
61 static int opt_require
= 0;
62 static int opt_trunk_is_master
= 0;
63 static int opt_strip_at
= 0;
64 static int opt_no_renames
= 0;
65 static int opt_names
= 0;
67 static char *pushline
= NULL
;
68 static char *copybuff
;
70 int (*fout
)(FILE *out
, const char *fmt
, ...);
71 size_t (*writeout
)(const void *ptr
, size_t size
, size_t nitems
, FILE *out
);
73 static void processfile(FILE *in
, FILE *out
,
74 const transform_t
*authors
, size_t acount
,
75 const transform_t
*branches
, size_t bcount
,
76 const char *convertid
);
78 static void setupme(const char *start
)
80 if (start
&& *start
) {
81 const char *last
= strrchr(start
, '/');
89 static void die(const char *fmt
, ...)
98 fprintf(stderr
, "%s: ", me
);
99 vfprintf(stderr
, fmt
, args
);
102 if (!len
|| fmt
[len
-1] != '\n')
103 fprintf(stderr
, "\n");
108 static int cmpxform(const void *_a
, const void *_b
)
110 const transform_t
*a
= (const transform_t
*)_a
;
111 const transform_t
*b
= (const transform_t
*)_b
;
112 return strcmp(a
->from
, b
->from
);
115 static void trimback(char *str
, const char *chars
)
120 while (strchr(chars
, str
[--len
]))
124 static int read_transform_file(const char *type
, FILE *f
, transform_t
**ans
)
126 transform_t
*xform
= NULL
;
129 char line
[MAXLINE
+2];
130 sprintf(fmt
, "%%%d[^\r\n]", MAXLINE
+1);
134 e
= fscanf(f
, fmt
, line
);
138 if (strlen(line
) > MAXLINE
)
139 die("%s file line exceeded %d characters", type
, MAXLINE
);
140 str
= line
+ strspn(line
, SPACETAB
);
141 if (*str
&& *str
!= '#') {
142 size_t idlen
= strcspn(str
, "=");
145 die("invalid %s line (no '='): %s", type
, str
);
147 die("invalid %s line (empty before '='): %s", type
, str
);
149 from
= str
+ idlen
+ 1;
150 from
+= strspn(from
, SPACETAB
);
152 die("invalid %s line (empty after '='): %s", type
, str
);
153 trimback(str
, SPACETAB
);
154 trimback(from
, SPACETAB
);
156 fprintf(stderr
, "FROM: %s TO: %s\n", str
, from
);
157 xform
= realloc(xform
, sizeof(transform_t
) * (cnt
+ 1));
159 die("out of memory allocating %s array", type
);
160 xform
[cnt
].from
= (char *)malloc(idlen
+ 1 + strlen(from
) + 1);
161 if (!xform
[cnt
].from
)
162 die("out of memory allocating %s array", type
);
163 memcpy(xform
[cnt
].from
, str
, idlen
+ 1);
164 xform
[cnt
].to
= xform
[cnt
].from
+ idlen
+ 1;
165 strcpy(xform
[cnt
].to
, from
);
169 e
= fscanf(f
, "%*[\r\n]");
173 qsort(xform
, cnt
, sizeof(transform_t
), cmpxform
);
182 static int foutnone(FILE *out
, const char *fmt
, ...)
189 size_t writeoutnone(const void *ptr
, size_t size
, size_t nitems
, FILE *out
)
197 int main(int argc
, char *argv
[])
199 transform_t
*authors
= NULL
;
201 transform_t
*branches
= NULL
;
203 FILE *inbinary
= freopen(NULL
, "rb", stdin
);
204 FILE *outbinary
= freopen(NULL
, "wb", stdout
);
211 copybuff
= (char *)malloc(COPYSIZE
);
213 die("out of memory allocating copy buffer");
215 die("freopen(NULL, \"rb\", stdin) failed");
217 die("freopen(NULL, \"wb\", stdout) failed");
218 for (; optind
< argc
; ++optind
) {
219 #define A argv[optind]
220 if (strcmp(A
, "--authors-file") == 0 || strcmp(A
, "-A") == 0) {
221 if (++optind
>= argc
|| !A
|| !*A
)
222 die("--authors-file requires a filename argument");
226 if (strncmp(A
, "--authors-file=", 15) == 0) {
227 const char *arg
= A
+ 15;
229 die("--authors-file requires a filename argument");
233 if (strcmp(A
, "--branches-file") == 0) {
234 if (++optind
>= argc
|| !A
|| !*A
)
235 die("--branches-file requires a filename argument");
239 if (strncmp(A
, "--branches-file=", 16) == 0) {
240 const char *arg
= A
+ 16;
242 die("--branches-file requires a filename argument");
246 if (strcmp(A
, "--convert-tagger") == 0) {
248 die("--convert-tagger may only be given once");
249 if (++optind
>= argc
|| !A
|| !*A
)
250 die("--convert-tagger requires an argument");
254 if (strncmp(A
, "--convert-tagger=", 17) == 0) {
255 const char *arg
= A
+ 17;
257 die("--convert-tagger may only be given once");
259 die("--convert-tagger requires an argument");
263 if (!strcmp(A
, "--require-authors")) {
267 if (!strcmp(A
, "--trunk-is-master")) {
268 opt_trunk_is_master
= 1;
271 if (!strcmp(A
, "--strip-at-suffix")) {
275 if (!strcmp(A
, "--expand-renames")) {
279 if (!strcmp(A
, "--names")) {
282 writeout
= writeoutnone
;
285 if (!strcmp(A
, "-V") || !strcmp(A
, "--version")) {
289 if (!strcmp(A
, "-v") || !strcmp(A
, "--verbose")) {
293 if (!strcmp(A
, "-h") || !strcmp(A
, "--help")) {
297 if (!strcmp(A
, "-d") || !strcmp(A
, "--debug")) {
301 if (strcmp(A
, "--") == 0) {
305 if (*A
!= '-' || !A
[1])
307 die("unknown option: %s", A
);
310 if (optind
+ 1 < argc
)
311 die("no more than one non-option argument allowed (try -h)");
312 if (optind
+ 1 == argc
&& strcmp(argv
[optind
], "-")) {
313 inbinary
= freopen(argv
[optind
], "rb", inbinary
);
315 die("cannot open file %s", argv
[optind
]);
318 printf("%s", gVersion
);
320 printf("%s", opt_verbose
? gHelp
: gUsage
);
321 if (opt_version
|| opt_help
)
323 if (opt_require
&& !authorsfile
)
324 die("--require-authors requires the --authors-file option");
325 if (opt_names
&& (opt_require
|| authorsfile
|| branchesfile
|| convertid
||
326 opt_trunk_is_master
|| opt_strip_at
|| opt_no_renames
))
327 die("--names may not be used together with any other options");
329 FILE *af
= fopen(authorsfile
, "rb");
331 die("cannot open authors file: %s", authorsfile
);
332 acount
= read_transform_file("authors", af
, &authors
);
335 die("invalid authors file format: %s", authorsfile
);
336 if (debug
&& acount
) {
338 for (i
=0; i
<acount
; ++i
)
339 fprintf(stderr
, "%s=%s\n", authors
[i
].from
, authors
[i
].to
);
343 FILE *bf
= fopen(branchesfile
, "rb");
345 die("cannot open branches file: %s", branchesfile
);
346 bcount
= read_transform_file("branches", bf
, &branches
);
349 die("invalid branches file format: %s", branchesfile
);
350 if (debug
&& bcount
) {
352 for (i
=0; i
<bcount
; ++i
)
353 fprintf(stderr
, "%s->%s\n", branches
[i
].from
, branches
[i
].to
);
356 if (opt_trunk_is_master
) {
357 branches
= realloc(branches
, sizeof(transform_t
) * (bcount
+ 1));
359 die("out of memory allocating branches array");
360 branches
[bcount
].from
= "refs/heads/trunk";
361 branches
[bcount
].to
= "refs/heads/master";
363 qsort(branches
, bcount
, sizeof(transform_t
), cmpxform
);
366 processfile(inbinary
, outbinary
, authors
, (size_t)acount
, branches
,
367 (size_t)bcount
, convertid
);
372 static char *nextline(FILE *in
)
374 static char line
[MAXLINE
+2];
379 char *ans
= pushline
;
384 sprintf(fmt
, "%%%d[^\r\n]", MAXLINE
+1);
386 e
= fscanf(in
, fmt
, line
);
387 if (e
< 0 && !feof(in
))
388 die("error reading input");
391 if (strlen(line
) > MAXLINE
)
392 die("input line exceeded %d characters", MAXLINE
);
395 } while (e
>= 0 && e
!= '\n');
396 if (e
< 0 && !feof(in
))
397 die("error reading input");
398 return line
+ strspn(line
, SPACETAB
);
401 static void processblob(FILE *in
, FILE *out
);
402 static void processcommit(FILE *in
, FILE *out
, const char *ref
,
403 const transform_t
*authors
, size_t acount
,
404 const transform_t
*branches
, size_t bcount
);
405 static void processtag(FILE *in
, FILE *out
, const char *tag
,
406 const transform_t
*authors
, size_t acount
,
407 const transform_t
*branches
, size_t bcount
,
408 const char *convertid
);
409 static void processreset(FILE *in
, FILE *out
, const char *ref
,
410 const transform_t
*branches
, size_t bcount
);
412 static void processfile(FILE *in
, FILE *out
,
413 const transform_t
*authors
, size_t acount
,
414 const transform_t
*branches
, size_t bcount
,
415 const char *convertid
)
421 if (!*line
|| *line
== '#') continue;
422 if (strcmp(line
, "blob") == 0)
423 processblob(in
, out
);
424 else if (strncmp(line
, "commit ", 7) == 0)
425 processcommit(in
, out
, line
+7, authors
, acount
, branches
, bcount
);
426 else if (strncmp(line
, "tag ", 4) == 0)
427 processtag(in
, out
, line
+4, authors
, acount
, branches
, bcount
, convertid
);
428 else if (strncmp(line
, "reset ", 6) == 0)
429 processreset(in
, out
, line
+6, branches
, bcount
);
430 else if (!strcmp(line
, "checkpoint") || !strcmp(line
, "done") ||
431 !strncmp(line
, "progress ", 9))
432 fout(out
, "%s\n\n", line
);
433 else if (!strncmp(line
, "get-mark ", 9) || !strncmp(line
, "cat-blob ", 9) ||
434 !strncmp(line
, "ls ", 3) || !strncmp(line
, "feature ", 8) ||
435 !strncmp(line
, "option ", 7))
436 fout(out
, "%s\n", line
);
438 die("unrecognized input command: %s", line
);
442 static const char *translate(const char *in
, const transform_t
*x
, size_t c
)
444 const transform_t
*t
;
446 search
.from
= (char *)in
;
448 t
= (transform_t
*)bsearch(&search
, x
, c
, sizeof(transform_t
), cmpxform
);
449 return t
? t
->to
: in
;
452 #define translateref(i,x,c) translate((i),(x),(c))
454 static const char *translateuser(const char *in
, const transform_t
*x
, size_t c
)
456 const transform_t
*t
;
458 search
.from
= (char *)in
;
460 t
= (transform_t
*)bsearch(&search
, x
, c
, sizeof(transform_t
), cmpxform
);
461 if (!t
&& opt_strip_at
&& strchr(in
, '@')) {
462 char user
[MAXLINE
+1];
464 *strchr(user
, '@') = 0;
466 t
= (transform_t
*)bsearch(&search
, x
, c
, sizeof(transform_t
), cmpxform
);
468 return t
? t
->to
: in
;
471 static const char *translatetag(const char *in
, const transform_t
*x
, size_t c
)
473 const transform_t
*t
;
474 transform_t search
= {NULL
, NULL
};
475 char *temptag
= malloc(10 + strlen(in
) + 1);
477 die("out of memory");
478 sprintf(temptag
, "refs/tags/%s", in
);
479 search
.from
= temptag
;
480 t
= (transform_t
*)bsearch(&search
, x
, c
, sizeof(transform_t
), cmpxform
);
481 if (!t
|| strncmp(t
->to
, "refs/tags/", 10) != 0)
486 static int splitauthor(char *line
, char **name
, char **email
, char **when
)
488 char *lt
= strchr(line
, '<');
489 char *gt
= strchr(line
, '>');
490 if (!lt
|| !gt
|| gt
<= lt
)
502 static void copydatapart(FILE *in
, FILE *out
, const char *data
, int nolf
)
506 size_t l
= strlen(data
);
507 if (strspn(data
, DIGITS
) == l
) {
510 die("Invalid data line: data %s", data
);
511 dlen
= (size_t)strtol(data
, NULL
, 10);
513 size_t cnt
, amnt
= COPYSIZE
;
516 cnt
= fread(copybuff
, 1, amnt
, in
);
520 if (writeout(copybuff
, cnt
, 1, out
) != 1)
521 die("failed writing data to output");
523 lastchar
= copybuff
[cnt
- 1];
527 die("unexpected EOF reading data %s", data
);
528 if (out
&& (!nolf
|| lastchar
!= '\n'))
530 } else if (l
< 3 || data
[0] != '<' || data
[1] != '<')
531 die("Invalid data line: data %s", data
);
533 fprintf(stderr
, "%s: warning: data << not fully supported\n", me
);
535 const char *line
= nextline(in
);
537 die("unexpected EOF reading data %s", data
);
539 fout(out
, "%s\n", line
);
540 if (strcmp(line
, data
+2) == 0) {
552 static void copydata(FILE *in
, FILE *out
, int nolf
, const char *err
)
554 const char *line
= nextline(in
);
556 err
= "missing data line";
557 if (!line
|| strncmp(line
, "data ", 5) != 0)
560 fout(out
, "%s\n", line
);
561 copydatapart(in
, out
, line
+5, nolf
);
564 static void processblob(FILE *in
, FILE *out
)
566 const char *line
= nextline(in
);
568 die("error reading blob header");
570 if (strncmp(line
, "mark ", 5) == 0) {
571 fout(out
, "%s\n", line
);
574 die("error reading blob header");
576 if (strncmp(line
, "data ", 5) != 0)
577 die("blob missing data line");
578 fout(out
, "%s\n", line
);
579 copydatapart(in
, out
, line
+5, 0);
582 static void processreset(FILE *in
, FILE *out
, const char *ref
,
583 const transform_t
*branches
, size_t bcount
)
586 const char *newref
= translateref(ref
, branches
, bcount
);
587 fout(out
, "reset %s\n", newref
);
589 if (strncmp(line
, "from ", 5) == 0)
590 fout(out
, "%s\n\n", line
);
595 static void processtag(FILE *in
, FILE *out
, const char *tag
,
596 const transform_t
*authors
, size_t acount
,
597 const transform_t
*branches
, size_t bcount
,
598 const char *convertid
)
600 char *name
= NULL
, *email
= NULL
, *when
= NULL
;
602 const char *newref
= translatetag(tag
, branches
, bcount
);
604 char tagline
[MAXLINE
+1];
605 char fromline
[MAXLINE
+1];
606 sprintf(tagline
, "tag %s\n", newref
);
608 if (!line
|| strncmp(line
, "from ", 5) != 0)
609 die("tag missing from line");
610 sprintf(fromline
, "%s\n", line
);
612 if (!line
|| strncmp(line
, "tagger ", 7) != 0)
613 die("tag missing tagger line");
614 if (!splitauthor(line
+7, &name
, &email
, &when
))
615 die("tag has bad tagger line");
617 fprintf(out
, "%s<%s>\n", name
, email
);
618 if (convertid
&& strcmp(convertid
, email
) == 0) {
619 fout(out
, "reset refs/tags/%s", tagline
+4);
620 fout(out
, "%s\n", fromline
);
623 fout(out
, "%s%s", tagline
, fromline
);
624 newauth
= translateuser(email
, authors
, acount
);
625 if (newauth
!= email
)
626 fout(out
, "tagger %s %s\n", newauth
, when
);
629 die("missing authors file author: \"%s\"", email
);
630 fout(out
, "tagger %s<%s> %s\n", name
, email
, when
);
633 copydata(in
, out
, 0, "tag missing data line");
636 static const char *find_second_space(const char *line
)
638 if (!line
|| !*line
|| *line
== ' ' || line
[1] != ' ' || line
[2] == ' ')
641 return strchr(line
+ 2, ' ');
644 line
+= strcspn(line
, "\\\"");
653 return line
[1] == ' ' ? line
+ 1 : NULL
;
657 static int is_inline_modify(const char *line
)
659 if (strncmp(line
, "M ", 2) != 0)
662 line
+= strspn(line
, DIGITS
);
663 return strncmp(line
, " inline ", 8) == 0;
666 static void processcommit(FILE *in
, FILE *out
, const char *ref
,
667 const transform_t
*authors
, size_t acount
,
668 const transform_t
*branches
, size_t bcount
)
670 char *name
= NULL
, *email
= NULL
, *when
= NULL
;
672 const char *newref
= translateref(ref
, branches
, bcount
);
674 fout(out
, "commit %s\n", newref
);
677 die("error reading commit header");
678 if (strncmp(line
, "mark ", 5) == 0) {
679 fout(out
, "%s\n", line
);
682 die("error reading commit header");
684 if (strncmp(line
, "author ", 7) == 0) {
685 if (!splitauthor(line
+7, &name
, &email
, &when
))
686 die("commit has bad author line");
688 fprintf(out
, "%s<%s>\n", name
, email
);
689 newauth
= translateuser(email
, authors
, acount
);
690 if (newauth
!= email
)
691 fout(out
, "author %s %s\n", newauth
, when
);
694 die("missing authors file author: \"%s\"", email
);
695 fout(out
, "author %s<%s> %s\n", name
, email
, when
);
699 die("error reading commit header");
701 if (strncmp(line
, "committer ", 10) != 0)
702 die("commit missing committer line");
703 if (!splitauthor(line
+10, &name
, &email
, &when
))
704 die("commit has bad committer line");
706 fprintf(out
, "%s<%s>\n", name
, email
);
707 newauth
= translateuser(email
, authors
, acount
);
708 if (newauth
!= email
)
709 fout(out
, "committer %s %s\n", newauth
, when
);
712 die("missing authors file author: \"%s\"", email
);
713 fout(out
, "committer %s<%s> %s\n", name
, email
, when
);
715 copydata(in
, out
, 1, "commit missing data line");
718 die("error reading commit header");
719 if (strncmp(line
, "from ", 5) == 0) {
720 fout(out
, "%s\n", line
);
723 die("error reading commit header");
725 while (strncmp(line
, "merge ", 6) == 0) {
726 fout(out
, "%s\n", line
);
729 die("error reading commit header");
731 while (!strcmp(line
, "deleteall") || !strncmp(line
, "M ", 2) ||
732 !strncmp(line
, "D ", 2) || !strncmp(line
, "C ", 2) ||
733 !strncmp(line
, "R ", 2) || !strncmp(line
, "N ", 2) ||
734 !strncmp(line
, "cat-blob ", 9) || !strncmp(line
, "ls ", 3) ||
735 !strncmp(line
, "get-mark ", 9)) {
736 if (!opt_no_renames
|| strncmp(line
, "R ", 2)) {
737 fout(out
, "%s\n", line
);
739 /* expand rename into copy + delete */
740 const char *space2
= find_second_space(line
);
742 die("error reading 'R' line");
743 fout(out
, "C %s\n", line
+2);
744 fout(out
, "D %.*s\n", (int)(space2
- line
) - 2, line
+2);
746 if (strncmp(line
, "N inline", 8) == 0 || is_inline_modify(line
))
747 copydata(in
, out
, 1, "inline N or M missing data line");
750 die("error reading commit header");