git-gui: add a simple msgfmt replacement
[git/platforms.git] / convert.c
blobb2885522802df721fed2025b02ec783d08beb951
1 #include "cache.h"
2 #include "attr.h"
3 #include "run-command.h"
5 /*
6 * convert.c - convert a file when checking it out and checking it in.
8 * This should use the pathname to decide on whether it wants to do some
9 * more interesting conversions (automatic gzip/unzip, general format
10 * conversions etc etc), but by default it just does automatic CRLF<->LF
11 * translation when the "auto_crlf" option is set.
14 #define CRLF_GUESS (-1)
15 #define CRLF_BINARY 0
16 #define CRLF_TEXT 1
17 #define CRLF_INPUT 2
19 struct text_stat {
20 /* CR, LF and CRLF counts */
21 unsigned cr, lf, crlf;
23 /* These are just approximations! */
24 unsigned printable, nonprintable;
27 static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
29 unsigned long i;
31 memset(stats, 0, sizeof(*stats));
33 for (i = 0; i < size; i++) {
34 unsigned char c = buf[i];
35 if (c == '\r') {
36 stats->cr++;
37 if (i+1 < size && buf[i+1] == '\n')
38 stats->crlf++;
39 continue;
41 if (c == '\n') {
42 stats->lf++;
43 continue;
45 if (c == 127)
46 /* DEL */
47 stats->nonprintable++;
48 else if (c < 32) {
49 switch (c) {
50 /* BS, HT, ESC and FF */
51 case '\b': case '\t': case '\033': case '\014':
52 stats->printable++;
53 break;
54 default:
55 stats->nonprintable++;
58 else
59 stats->printable++;
62 // If file ends with EOF then don't count this EOF as non-printable
63 if ( size >= 1 && buf[size-1] == '\032' )
64 stats->nonprintable--;
68 * The same heuristics as diff.c::mmfile_is_binary()
70 static int is_binary(unsigned long size, struct text_stat *stats)
73 if ((stats->printable >> 7) < stats->nonprintable)
74 return 1;
76 * Other heuristics? Average line length might be relevant,
77 * as might LF vs CR vs CRLF counts..
79 * NOTE! It might be normal to have a low ratio of CRLF to LF
80 * (somebody starts with a LF-only file and edits it with an editor
81 * that adds CRLF only to lines that are added..). But do we
82 * want to support CR-only? Probably not.
84 return 0;
87 static char *crlf_to_git(const char *path, const char *src, unsigned long *sizep, int action)
89 char *buffer, *dst;
90 unsigned long size, nsize;
91 struct text_stat stats;
93 if ((action == CRLF_BINARY) || !auto_crlf)
94 return NULL;
96 size = *sizep;
97 if (!size)
98 return NULL;
100 gather_stats(src, size, &stats);
102 /* No CR? Nothing to convert, regardless. */
103 if (!stats.cr)
104 return NULL;
106 if (action == CRLF_GUESS) {
108 * We're currently not going to even try to convert stuff
109 * that has bare CR characters. Does anybody do that crazy
110 * stuff?
112 if (stats.cr != stats.crlf)
113 return NULL;
116 * And add some heuristics for binary vs text, of course...
118 if (is_binary(size, &stats))
119 return NULL;
123 * Ok, allocate a new buffer, fill it in, and return it
124 * to let the caller know that we switched buffers.
126 nsize = size - stats.crlf;
127 buffer = xmalloc(nsize);
128 *sizep = nsize;
130 dst = buffer;
131 if (action == CRLF_GUESS) {
133 * If we guessed, we already know we rejected a file with
134 * lone CR, and we can strip a CR without looking at what
135 * follow it.
137 do {
138 unsigned char c = *src++;
139 if (c != '\r')
140 *dst++ = c;
141 } while (--size);
142 } else {
143 do {
144 unsigned char c = *src++;
145 if (! (c == '\r' && (1 < size && *src == '\n')))
146 *dst++ = c;
147 } while (--size);
150 return buffer;
153 static char *crlf_to_worktree(const char *path, const char *src, unsigned long *sizep, int action)
155 char *buffer, *dst;
156 unsigned long size, nsize;
157 struct text_stat stats;
158 unsigned char last;
160 if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
161 auto_crlf <= 0)
162 return NULL;
164 size = *sizep;
165 if (!size)
166 return NULL;
168 gather_stats(src, size, &stats);
170 /* No LF? Nothing to convert, regardless. */
171 if (!stats.lf)
172 return NULL;
174 /* Was it already in CRLF format? */
175 if (stats.lf == stats.crlf)
176 return NULL;
178 if (action == CRLF_GUESS) {
179 /* If we have any bare CR characters, we're not going to touch it */
180 if (stats.cr != stats.crlf)
181 return NULL;
183 if (is_binary(size, &stats))
184 return NULL;
188 * Ok, allocate a new buffer, fill it in, and return it
189 * to let the caller know that we switched buffers.
191 nsize = size + stats.lf - stats.crlf;
192 buffer = xmalloc(nsize);
193 *sizep = nsize;
194 last = 0;
196 dst = buffer;
197 do {
198 unsigned char c = *src++;
199 if (c == '\n' && last != '\r')
200 *dst++ = '\r';
201 *dst++ = c;
202 last = c;
203 } while (--size);
205 return buffer;
208 static int filter_buffer(const char *path, const char *src,
209 unsigned long size, const char *cmd)
212 * Spawn cmd and feed the buffer contents through its stdin.
214 struct child_process child_process;
215 int pipe_feed[2];
216 int write_err, status;
218 memset(&child_process, 0, sizeof(child_process));
220 if (pipe(pipe_feed) < 0) {
221 error("cannot create pipe to run external filter %s", cmd);
222 return 1;
225 child_process.pid = fork();
226 if (child_process.pid < 0) {
227 error("cannot fork to run external filter %s", cmd);
228 close(pipe_feed[0]);
229 close(pipe_feed[1]);
230 return 1;
232 if (!child_process.pid) {
233 dup2(pipe_feed[0], 0);
234 close(pipe_feed[0]);
235 close(pipe_feed[1]);
236 execlp("sh", "sh", "-c", cmd, NULL);
237 return 1;
239 close(pipe_feed[0]);
241 write_err = (write_in_full(pipe_feed[1], src, size) < 0);
242 if (close(pipe_feed[1]))
243 write_err = 1;
244 if (write_err)
245 error("cannot feed the input to external filter %s", cmd);
247 status = finish_command(&child_process);
248 if (status)
249 error("external filter %s failed %d", cmd, -status);
250 return (write_err || status);
253 static char *apply_filter(const char *path, const char *src,
254 unsigned long *sizep, const char *cmd)
257 * Create a pipeline to have the command filter the buffer's
258 * contents.
260 * (child --> cmd) --> us
262 const int SLOP = 4096;
263 int pipe_feed[2];
264 int status;
265 char *dst;
266 unsigned long dstsize, dstalloc;
267 struct child_process child_process;
269 if (!cmd)
270 return NULL;
272 memset(&child_process, 0, sizeof(child_process));
274 if (pipe(pipe_feed) < 0) {
275 error("cannot create pipe to run external filter %s", cmd);
276 return NULL;
279 fflush(NULL);
280 child_process.pid = fork();
281 if (child_process.pid < 0) {
282 error("cannot fork to run external filter %s", cmd);
283 close(pipe_feed[0]);
284 close(pipe_feed[1]);
285 return NULL;
287 if (!child_process.pid) {
288 dup2(pipe_feed[1], 1);
289 close(pipe_feed[0]);
290 close(pipe_feed[1]);
291 exit(filter_buffer(path, src, *sizep, cmd));
293 close(pipe_feed[1]);
295 dstalloc = *sizep;
296 dst = xmalloc(dstalloc);
297 dstsize = 0;
299 while (1) {
300 ssize_t numread = xread(pipe_feed[0], dst + dstsize,
301 dstalloc - dstsize);
303 if (numread <= 0) {
304 if (!numread)
305 break;
306 error("read from external filter %s failed", cmd);
307 free(dst);
308 dst = NULL;
309 break;
311 dstsize += numread;
312 if (dstalloc <= dstsize + SLOP) {
313 dstalloc = dstsize + SLOP;
314 dst = xrealloc(dst, dstalloc);
317 if (close(pipe_feed[0])) {
318 error("read from external filter %s failed", cmd);
319 free(dst);
320 dst = NULL;
323 status = finish_command(&child_process);
324 if (status) {
325 error("external filter %s failed %d", cmd, -status);
326 free(dst);
327 dst = NULL;
330 if (dst)
331 *sizep = dstsize;
332 return dst;
335 static struct convert_driver {
336 const char *name;
337 struct convert_driver *next;
338 char *smudge;
339 char *clean;
340 } *user_convert, **user_convert_tail;
342 static int read_convert_config(const char *var, const char *value)
344 const char *ep, *name;
345 int namelen;
346 struct convert_driver *drv;
349 * External conversion drivers are configured using
350 * "filter.<name>.variable".
352 if (prefixcmp(var, "filter.") || (ep = strrchr(var, '.')) == var + 6)
353 return 0;
354 name = var + 7;
355 namelen = ep - name;
356 for (drv = user_convert; drv; drv = drv->next)
357 if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
358 break;
359 if (!drv) {
360 char *namebuf;
361 drv = xcalloc(1, sizeof(struct convert_driver));
362 namebuf = xmalloc(namelen + 1);
363 memcpy(namebuf, name, namelen);
364 namebuf[namelen] = 0;
365 drv->name = namebuf;
366 drv->next = NULL;
367 *user_convert_tail = drv;
368 user_convert_tail = &(drv->next);
371 ep++;
374 * filter.<name>.smudge and filter.<name>.clean specifies
375 * the command line:
377 * command-line
379 * The command-line will not be interpolated in any way.
382 if (!strcmp("smudge", ep)) {
383 if (!value)
384 return error("%s: lacks value", var);
385 drv->smudge = strdup(value);
386 return 0;
389 if (!strcmp("clean", ep)) {
390 if (!value)
391 return error("%s: lacks value", var);
392 drv->clean = strdup(value);
393 return 0;
395 return 0;
398 static void setup_convert_check(struct git_attr_check *check)
400 static struct git_attr *attr_crlf;
401 static struct git_attr *attr_ident;
402 static struct git_attr *attr_filter;
404 if (!attr_crlf) {
405 attr_crlf = git_attr("crlf", 4);
406 attr_ident = git_attr("ident", 5);
407 attr_filter = git_attr("filter", 6);
408 user_convert_tail = &user_convert;
409 git_config(read_convert_config);
411 check[0].attr = attr_crlf;
412 check[1].attr = attr_ident;
413 check[2].attr = attr_filter;
416 static int count_ident(const char *cp, unsigned long size)
419 * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
421 int cnt = 0;
422 char ch;
424 while (size) {
425 ch = *cp++;
426 size--;
427 if (ch != '$')
428 continue;
429 if (size < 3)
430 break;
431 if (memcmp("Id", cp, 2))
432 continue;
433 ch = cp[2];
434 cp += 3;
435 size -= 3;
436 if (ch == '$')
437 cnt++; /* $Id$ */
438 if (ch != ':')
439 continue;
442 * "$Id: ... "; scan up to the closing dollar sign and discard.
444 while (size) {
445 ch = *cp++;
446 size--;
447 if (ch == '$') {
448 cnt++;
449 break;
453 return cnt;
456 static char *ident_to_git(const char *path, const char *src, unsigned long *sizep, int ident)
458 int cnt;
459 unsigned long size;
460 char *dst, *buf;
462 if (!ident)
463 return NULL;
464 size = *sizep;
465 cnt = count_ident(src, size);
466 if (!cnt)
467 return NULL;
468 buf = xmalloc(size);
470 for (dst = buf; size; size--) {
471 char ch = *src++;
472 *dst++ = ch;
473 if ((ch == '$') && (3 <= size) &&
474 !memcmp("Id:", src, 3)) {
475 unsigned long rem = size - 3;
476 const char *cp = src + 3;
477 do {
478 ch = *cp++;
479 if (ch == '$')
480 break;
481 rem--;
482 } while (rem);
483 if (!rem)
484 continue;
485 memcpy(dst, "Id$", 3);
486 dst += 3;
487 size -= (cp - src);
488 src = cp;
492 *sizep = dst - buf;
493 return buf;
496 static char *ident_to_worktree(const char *path, const char *src, unsigned long *sizep, int ident)
498 int cnt;
499 unsigned long size;
500 char *dst, *buf;
501 unsigned char sha1[20];
503 if (!ident)
504 return NULL;
506 size = *sizep;
507 cnt = count_ident(src, size);
508 if (!cnt)
509 return NULL;
511 hash_sha1_file(src, size, "blob", sha1);
512 buf = xmalloc(size + cnt * 43);
514 for (dst = buf; size; size--) {
515 const char *cp;
516 /* Fetch next source character, move the pointer on */
517 char ch = *src++;
518 /* Copy the current character to the destination */
519 *dst++ = ch;
520 /* If the current character is "$" or there are less than three
521 * remaining bytes or the two bytes following this one are not
522 * "Id", then simply read the next character */
523 if ((ch != '$') || (size < 3) || memcmp("Id", src, 2))
524 continue;
526 * Here when
527 * - There are more than 2 bytes remaining
528 * - The current three bytes are "$Id"
529 * with
530 * - ch == "$"
531 * - src[0] == "I"
535 * It's possible that an expanded Id has crept its way into the
536 * repository, we cope with that by stripping the expansion out
538 if (src[2] == ':') {
539 /* Expanded keywords have "$Id:" at the front */
541 /* discard up to but not including the closing $ */
542 unsigned long rem = size - 3;
543 /* Point at first byte after the ":" */
544 cp = src + 3;
546 * Throw away characters until either
547 * - we reach a "$"
548 * - we run out of bytes (rem == 0)
550 do {
551 ch = *cp;
552 if (ch == '$')
553 break;
554 cp++;
555 rem--;
556 } while (rem);
557 /* If the above finished because it ran out of characters, then
558 * this is an incomplete keyword, so don't run the expansion */
559 if (!rem)
560 continue;
561 } else if (src[2] == '$')
562 cp = src + 2;
563 else
564 /* Anything other than "$Id:XXX$" or $Id$ and we skip the
565 * expansion */
566 continue;
568 /* cp is now pointing at the last $ of the keyword */
570 memcpy(dst, "Id: ", 4);
571 dst += 4;
572 memcpy(dst, sha1_to_hex(sha1), 40);
573 dst += 40;
574 *dst++ = ' ';
576 /* Adjust for the characters we've discarded */
577 size -= (cp - src);
578 src = cp;
580 /* Copy the final "$" */
581 *dst++ = *src++;
582 size--;
585 *sizep = dst - buf;
586 return buf;
589 static int git_path_check_crlf(const char *path, struct git_attr_check *check)
591 const char *value = check->value;
593 if (ATTR_TRUE(value))
594 return CRLF_TEXT;
595 else if (ATTR_FALSE(value))
596 return CRLF_BINARY;
597 else if (ATTR_UNSET(value))
599 else if (!strcmp(value, "input"))
600 return CRLF_INPUT;
601 return CRLF_GUESS;
604 static struct convert_driver *git_path_check_convert(const char *path,
605 struct git_attr_check *check)
607 const char *value = check->value;
608 struct convert_driver *drv;
610 if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
611 return NULL;
612 for (drv = user_convert; drv; drv = drv->next)
613 if (!strcmp(value, drv->name))
614 return drv;
615 return NULL;
618 static int git_path_check_ident(const char *path, struct git_attr_check *check)
620 const char *value = check->value;
622 return !!ATTR_TRUE(value);
625 char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
627 struct git_attr_check check[3];
628 int crlf = CRLF_GUESS;
629 int ident = 0;
630 char *filter = NULL;
631 char *buf, *buf2;
633 setup_convert_check(check);
634 if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
635 struct convert_driver *drv;
636 crlf = git_path_check_crlf(path, check + 0);
637 ident = git_path_check_ident(path, check + 1);
638 drv = git_path_check_convert(path, check + 2);
639 if (drv && drv->clean)
640 filter = drv->clean;
643 buf = apply_filter(path, src, sizep, filter);
645 buf2 = crlf_to_git(path, buf ? buf : src, sizep, crlf);
646 if (buf2) {
647 free(buf);
648 buf = buf2;
651 buf2 = ident_to_git(path, buf ? buf : src, sizep, ident);
652 if (buf2) {
653 free(buf);
654 buf = buf2;
657 return buf;
660 char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep)
662 struct git_attr_check check[3];
663 int crlf = CRLF_GUESS;
664 int ident = 0;
665 char *filter = NULL;
666 char *buf, *buf2;
668 setup_convert_check(check);
669 if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
670 struct convert_driver *drv;
671 crlf = git_path_check_crlf(path, check + 0);
672 ident = git_path_check_ident(path, check + 1);
673 drv = git_path_check_convert(path, check + 2);
674 if (drv && drv->smudge)
675 filter = drv->smudge;
678 buf = ident_to_worktree(path, src, sizep, ident);
680 buf2 = crlf_to_worktree(path, buf ? buf : src, sizep, crlf);
681 if (buf2) {
682 free(buf);
683 buf = buf2;
686 buf2 = apply_filter(path, buf ? buf : src, sizep, filter);
687 if (buf2) {
688 free(buf);
689 buf = buf2;
692 return buf;
695 void *convert_sha1_file(const char *path, const unsigned char *sha1,
696 unsigned int mode, enum object_type *type,
697 unsigned long *size)
699 void *buffer = read_sha1_file(sha1, type, size);
700 if (S_ISREG(mode) && buffer) {
701 void *converted = convert_to_working_tree(path, buffer, size);
702 if (converted) {
703 free(buffer);
704 buffer = converted;
707 return buffer;