vpatch: allow mode setting keys to toggle back to 'merge'.
[wiggle/upstream.git] / merge2.c
blob8520082bf55b2af7865670fab2c6afc0ed66401b
1 /*
2 * wiggle - apply rejected patches
4 * Copyright (C) 2005 Neil Brown <neilb@cse.unsw.edu.au>
5 * Copyright (C) 2010 Neil Brown <neilb@suse.de>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 * Author: Neil Brown
23 * Email: <neilb@suse.de>
26 #include "wiggle.h"
29 * Second attempt at merging....
31 * We want to create a mergelist which identifies 'orig' and 'after'
32 * sections (from a and c) and conflicts (which are ranges of a,b,c which
33 * all don't match).
34 * It is also helpful to differentiate 'orig' sections that aren't
35 * matched in 'b' with orig sections that are.
36 * To help with highlighting, it will be useful to know where
37 * the conflicts match the csl lists.
39 * This can all be achieved with a list of (a,b,c,c1,c1) 5-tuples.
40 * If two consecutive differ in more than one of a,b,c, it is a
41 * conflict.
42 * If only 'a' differ, it is un-matched original.
43 * If only 'b' differ, it is matched, unchanged original
44 * If only 'c' differ, it is 1
47 static inline int min(int a, int b)
49 return a < b ? a : b;
52 static int check_alreadyapplied(struct file af, struct file cf,
53 struct merge *m)
55 int i;
56 if (m->al != m->cl)
57 return 0;
58 for (i = 0; i < m->al; i++) {
59 if (af.list[m->a+i].len != cf.list[m->c+i].len)
60 return 0;
61 if (strncmp(af.list[m->a+i].start,
62 cf.list[m->c+i].start,
63 af.list[m->a+i].len) != 0)
64 return 0;
66 if (do_trace) {
67 printf("already applied %d,%d,%d - %d,%d,%d\n",
68 m->a, m->b, m->c, m->al, m->bl, m->cl);
69 printf(" %.10s - %.10s\n", af.list[m->a].start,
70 cf.list[m->c].start);
72 m->type = AlreadyApplied;
73 return 1;
76 /* A 'cut-point' is a location in the merger where it is reasonable
77 * the change the mode of display - between displaying the merger
78 * and displaying the separate streams.
79 * A 'conflict' can only be displayed as separate stream so when
80 * one is found, we need to find a preceeding and trailing cut-point
81 * and enlarge the conflict to that range.
82 * A suitable location is one where all three streams are at a line-end.
84 static int is_cutpoint(struct merge m,
85 struct file af, struct file bf, struct file cf)
87 return ((m.a == 0 || ends_line(af.list[m.a-1])) &&
88 (m.b == 0 || ends_line(bf.list[m.b-1])) &&
89 (m.c == 0 || ends_line(cf.list[m.c-1])));
92 int isolate_conflicts(struct file af, struct file bf, struct file cf,
93 struct csl *csl1, struct csl *csl2, int words,
94 struct merge *m, int show_wiggles)
96 /* A conflict indicates that something is definitely wrong
97 * and so we need to be a bit suspicious of nearby apparent matches.
98 * To display a conflict effectively we expands it's effect to
99 * include any Extraneous, Unmatched, Changed or AlreadyApplied text.
100 * Also, unless 'words', we need to include any partial lines
101 * in the Unchanged text that forms the border of a conflict.
103 * A Changed text may also border a conflict, but it can
104 * only border one conflict (where as an Unchanged can border
105 * a preceeding and a following conflict).
106 * The 'new' section of a Changed text appears in the
107 * conflict as does any part of the original before
108 * a newline.
110 * If 'show_wiggles' is set we treat wiggles like conflicts.
111 * A 'wiggle' is implied by any Extraneous text being ignored,
112 * or any line that has both Changed and Unmatched content.
113 * (Unmatched content where nothing is changed is common and not
114 * really a 'wiggle').
116 * A hunk header is never considered part of a conflict. It
117 * thereby can serve as a separator between conflicts.
119 * We need to ensure there is adequate context for the conflict.
120 * So ensure there are at least 3 newlines in Extraneous or
121 * Unchanged on both sides of a Conflict - but don't go so far
122 * as including a hunk header.
123 * If there are 3, and they are all in 'Unchanged' sections, then
124 * that much context is not really needed - reduce it a bit.
126 int i, j, k;
127 int cnt = 0;
128 int changed = 0;
129 int unmatched = 0;
131 for (i = 0; m[i].type != End; i++)
132 m[i].in_conflict = 0;
134 for (i = 0; m[i].type != End; i++) {
135 if (m[i].type == Changed)
136 changed = 1;
137 if (m[i].type == Unmatched)
138 unmatched = 1;
139 if ((m[i].type == Conflict && m[i].conflict_ignored == 0) ||
140 (show_wiggles && ((changed && unmatched)
141 || m[i].type == Extraneous))) {
142 /* We have a conflict (or wiggle) here.
143 * First search backwards for an Unchanged marking
144 * things as in_conflict. Then find the
145 * cut-point in the Unchanged. If there isn't one,
146 * keep looking.
148 * Then search forward doing the same thing.
150 int newlines = 0;
151 cnt++;
152 m[i].in_conflict = 1;
153 j = i;
154 while (--j >= 0) {
155 if (m[j].type == Extraneous &&
156 bf.list[m[j].b].start[0] == '\0')
157 /* hunk header - not conflict any more */
158 break;
159 if (!m[j].in_conflict) {
160 m[j].in_conflict = 1;
161 m[j].lo = 0;
162 } else if (m[j].type == Changed) {
163 /* This can no longer form a border */
164 m[j].hi = -1;
165 /* We merge these conflicts and stop searching */
166 cnt--;
167 break;
169 if (m[j].type == Extraneous) {
170 for (k = m[j].bl; k > 0; k--)
171 if (ends_line(bf.list[m[j].b+k-1]))
172 newlines++;
175 if (m[j].type == Unchanged || m[j].type == Changed) {
176 /* If we find enough newlines in this section,
177 * then we only really need 1, but would rather
178 * it wasn't the first one. 'firstk' allows us
179 * to track which newline we actually use
181 int firstk = m[j].al+1;
182 if (words) {
183 m[j].hi = m[j].al;
184 break;
186 /* need to find the last line-break, which
187 * might be after the last newline, if there
188 * is one, or might be at the start
190 for (k = m[j].al; k > 0; k--)
191 if (ends_line(af.list[m[j].a+k-1])) {
192 if (firstk >= m[j].al)
193 firstk = k;
194 newlines++;
195 if (newlines >= 3) {
196 k = firstk;
197 break;
200 if (k > 0)
201 m[j].hi = k;
202 else if (is_cutpoint(m[j], af,bf,cf))
203 m[j].hi = 0;
204 else
205 /* no start-of-line found... */
206 m[j].hi = -1;
207 if (m[j].hi > 0 && m[j].type == Changed) {
208 /* this can only work if start is
209 * also a line break */
210 if (is_cutpoint(m[j], af,bf,cf))
211 /* ok */;
212 else
213 m[j].hi = -1;
215 if (m[j].hi >= 0)
216 break;
220 /* now the forward search */
221 newlines = 0;
222 for (j = i+1; m[j].type != End; j++) {
223 if (m[j].type == Extraneous &&
224 bf.list[m[j].b].start[0] == '\0')
225 /* hunk header - not conflict any more */
226 break;
227 m[j].in_conflict = 1;
228 if (m[j].type == Extraneous) {
229 for (k = 0; k < m[j].bl; k++)
230 if (ends_line(bf.list[m[j].b+k]))
231 newlines++;
233 if (m[j].type == Unchanged || m[j].type == Changed) {
234 m[j].hi = m[j].al;
235 if (words) {
236 m[j].lo = 0;
237 break;
239 /* need to find a line-break, which might be at
240 * the very beginning, or might be after the
241 * first newline - if there is one
243 if (is_cutpoint(m[j], af,bf,cf))
244 m[j].lo = 0;
245 else {
246 /* If we find enough newlines in this section,
247 * then we only really need 1, but would rather
248 * it wasn't the first one. 'firstk' allows us
249 * to track which newline we actually use
251 int firstk = -1;
252 for (k = 0 ; k < m[j].al ; k++)
253 if (ends_line(af.list[m[j].a+k])) {
254 if (firstk <= 0)
255 firstk = k;
256 newlines++;
257 if (newlines >= 3) {
258 k = firstk;
259 break;
262 if (firstk >= 0 &&
263 m[j+1].type == Unmatched) {
264 /* If this Unmatched exceeds 3 lines, just stop here */
265 int p;
266 int nl = 0;
267 for (p = 0; p < m[j+1].al ; p++)
268 if (ends_line(af.list[m[j+1].a+p])) {
269 nl++;
270 if (nl > 3)
271 break;
273 if (nl > 3)
274 k = firstk;
276 if (k < m[j].al)
277 m[j].lo = k+1;
278 else
279 /* no start-of-line found */
280 m[j].lo = m[j].al+1;
282 if (m[j].lo <= m[j].al+1 && m[j].type == Changed) {
283 /* this can only work if the end is a line break */
284 if (is_cutpoint(m[j+1], af,bf,cf))
285 /* ok */;
286 else
287 m[j].lo = m[j].al+1;
289 if (m[j].lo < m[j].al+1)
290 break;
293 i = j - 1;
295 if (m[i].al > 0 && ends_line(af.list[m[i].a+m[i].al-1])) {
296 unmatched = 0;
297 changed = 0;
300 return cnt;
303 struct ci make_merger(struct file af, struct file bf, struct file cf,
304 struct csl *csl1, struct csl *csl2, int words,
305 int ignore_already, int show_wiggles)
307 /* find the wiggles and conflicts between csl1 and csl2
309 struct ci rv;
310 int i, l;
311 int a, b, c, c1, c2;
312 int wiggle_found = 0;
314 rv.conflicts = rv.wiggles = rv.ignored = 0;
316 for (i = 0; csl1[i].len; i++)
318 l = i;
319 for (i = 0; csl2[i].len; i++)
321 l += i;
322 /* maybe a bit of slack at each end */
323 l = l * 4 + 10;
325 rv.merger = xmalloc(sizeof(struct merge)*l);
327 a = b = c = c1 = c2 = 0;
328 i = 0;
329 while (1) {
330 int match1, match2;
331 match1 = (a >= csl1[c1].a && b >= csl1[c1].b); /* c1 doesn't match */
332 match2 = (b >= csl2[c2].a && c >= csl2[c2].b);
334 rv.merger[i].a = a;
335 rv.merger[i].b = b;
336 rv.merger[i].c = c;
337 rv.merger[i].c1 = c1;
338 rv.merger[i].c2 = c2;
339 rv.merger[i].in_conflict = 0;
340 rv.merger[i].conflict_ignored = 0;
342 if (!match1 && match2) {
343 /* This is either Unmatched or Extraneous - probably both.
344 * If the match2 is a hunk-header Extraneous, it must
345 * align with an end-of-line in 'a', so adjust endpoint
347 int newa = csl1[c1].a;
348 if (b < bf.elcnt && bf.list[b].start
349 && bf.list[b].start[0] == '\0') {
350 while (newa > a &&
351 !ends_line(af.list[newa-1]))
352 newa--;
353 while (newa < af.elcnt && !(newa == 0 || ends_line(af.list[newa-1])))
354 newa++;
356 if (a < newa) {
357 /* some unmatched text */
358 rv.merger[i].type = Unmatched;
359 rv.merger[i].al = newa - a;
360 rv.merger[i].bl = 0;
361 rv.merger[i].cl = 0;
362 wiggle_found++;
363 } else {
364 int newb;
365 int j;
366 assert(b < csl1[c1].b);
367 /* some Extraneous text */
368 /* length is min of unmatched on left
369 * and matched on right.
370 * However a hunk-header must be an
371 * Extraneous section by itself, so if this
372 * start with one, the length is 1, and if
373 * there is one in the middle, only take the
374 * text up to there for now.
376 rv.merger[i].type = Extraneous;
377 rv.merger[i].al = 0;
378 newb = b +
379 min(csl1[c1].b - b,
380 csl2[c2].len - (b-csl2[c2].a));
381 if (bf.list[b].start[0] == '\0')
382 newb = b + 1;
383 for (j = b; j < newb; j++) {
384 if (bf.list[j].start[0] == '\0') {
385 if (wiggle_found > 1)
386 rv.wiggles++;
387 wiggle_found = 0;
388 if (j > b)
389 newb = j;
390 } else
391 wiggle_found++;
393 rv.merger[i].cl =
394 rv.merger[i].bl = newb - b;
396 } else if (match1 && !match2) {
397 /* some changed text
398 * if 'c' is currently at a suitable cut-point, then
399 * we can look for a triple-cut-point for start.
400 * Also, if csl2[c2].b isn't in a conflict, and is
401 * a suitable cut-point, then we could make a
402 * triple-cut-point for end of a conflict.
405 rv.merger[i].type = Changed;
406 rv.merger[i].bl = min(csl1[c1].b+csl1[c1].len, csl2[c2].a) - b;
407 rv.merger[i].al = rv.merger[i].bl;
408 rv.merger[i].cl = csl2[c2].b - c;
409 } else if (match1 && match2) {
410 /* Some unchanged text
412 rv.merger[i].type = Unchanged;
413 rv.merger[i].bl =
414 min(csl1[c1].len - (b-csl1[c1].b),
415 csl2[c2].len - (b-csl2[c2].a));
416 rv.merger[i].al = rv.merger[i].cl =
417 rv.merger[i].bl;
418 } else {
419 /* must be a conflict.
420 * Move a and c to next match, and b to closest of the two
422 rv.merger[i].type = Conflict;
423 rv.merger[i].al = csl1[c1].a - a;
424 rv.merger[i].cl = csl2[c2].b - c;
425 rv.merger[i].bl = min(csl1[c1].b, csl2[c2].a) - b;
426 if (ignore_already &&
427 check_alreadyapplied(af, cf, &rv.merger[i]))
428 rv.ignored++;
429 else if (rv.merger[i].bl == 0 &&
430 rv.merger[i].cl > 0)
431 /* As the 'before' stream is empty, this
432 * could look like Unmatched in the
433 * original, and an insertion in the
434 * diff. Reporting it like that is
435 * probably more useful that as a full
436 * conflict.
437 * Leave the type for the insertion as
438 * Conflict (not Changed) as there is some
439 * real uncertainty here, but allow the
440 * original to become Unmatched.
442 rv.merger[i].al = 0;
444 a += rv.merger[i].al;
445 b += rv.merger[i].bl;
446 c += rv.merger[i].cl;
447 i++;
449 while (csl1[c1].a + csl1[c1].len <= a && csl1[c1].len)
450 c1++;
451 assert(csl1[c1].b + csl1[c1].len >= b);
452 while (csl2[c2].b + csl2[c2].len <= c && csl2[c2].len)
453 c2++;
454 assert(csl2[c2].a + csl2[c2].len >= b);
455 if (csl1[c1].len == 0 && csl2[c2].len == 0 &&
456 a == csl1[c1].a && b == csl1[c1].b &&
457 b == csl2[c2].a && c == csl2[c2].b)
458 break;
460 rv.merger[i].type = End;
461 rv.merger[i].a = a;
462 rv.merger[i].b = b;
463 rv.merger[i].c = c;
464 rv.merger[i].c1 = c1;
465 rv.merger[i].c2 = c2;
466 rv.merger[i].in_conflict = 0;
467 rv.merger[i].conflict_ignored = 0;
468 assert(i < l);
469 rv.conflicts = isolate_conflicts(af, bf, cf, csl1, csl2, words,
470 rv.merger, show_wiggles);
471 if (wiggle_found)
472 rv.wiggles++;
473 return rv;
476 static void printrange(FILE *out, struct file *f, int start, int len)
478 while (len > 0) {
479 printword(out, f->list[start]);
480 start++;
481 len--;
485 void print_merge(FILE *out, struct file *a, struct file *b, struct file *c,
486 int words, struct merge *merger)
488 struct merge *m;
490 for (m = merger; m->type != End ; m++) {
491 struct merge *cm;
492 if (do_trace)
493 printf("[%s: %d-%d,%d-%d,%d-%d%s(%d,%d)]\n",
494 m->type==Unmatched ? "Unmatched" :
495 m->type==Unchanged ? "Unchanged" :
496 m->type==Extraneous ? "Extraneous" :
497 m->type==Changed ? "Changed" :
498 m->type==AlreadyApplied ? "AlreadyApplied" :
499 m->type==Conflict ? "Conflict":"unknown",
500 m->a, m->a+m->al-1,
501 m->b, m->b+m->bl-1,
502 m->c, m->c+m->cl-1,
503 m->in_conflict ? " in_conflict" : "",
504 m->lo, m->hi);
506 while (m->in_conflict) {
507 /* need to print from 'hi' to 'lo' of next
508 * Unchanged which is < it's hi
510 int found_conflict = 0;
511 int st = 0, st1;
512 if (m->type == Unchanged || m->type == Changed)
513 if (m->hi >= m->lo)
514 st = m->hi;
515 st1 = st;
517 if (m->type == Unchanged)
518 printrange(out, a, m->a+m->lo, m->hi - m->lo);
520 if (do_trace)
521 for (cm = m; cm->in_conflict; cm++) {
522 printf("{%s: %d-%d,%d-%d,%d-%d%s(%d,%d)}\n",
523 cm->type==Unmatched?"Unmatched":
524 cm->type==Unchanged?"Unchanged":
525 cm->type==Extraneous?"Extraneous":
526 cm->type==Changed?"Changed":
527 cm->type==AlreadyApplied?"AlreadyApplied":
528 cm->type==Conflict?"Conflict":"unknown",
529 cm->a, cm->a+cm->al-1,
530 cm->b, cm->b+cm->bl-1,
531 cm->c, cm->c+cm->cl-1,
532 cm->in_conflict ? " in_conflict" : "",
533 cm->lo, cm->hi);
534 if ((cm->type == Unchanged || cm->type == Changed)
535 && cm != m && cm->lo < cm->hi)
536 break;
539 fputs(words ? "<<<---" : "<<<<<<<\n", out);
540 for (cm = m; cm->in_conflict; cm++) {
541 if (cm->type == Conflict)
542 found_conflict = 1;
543 if ((cm->type == Unchanged || cm->type == Changed)
544 && cm != m && cm->lo < cm->hi) {
545 printrange(out, a, cm->a, cm->lo);
546 break;
548 printrange(out, a, cm->a+st1, cm->al-st1);
549 st1 = 0;
551 fputs(words ? "|||" : "|||||||\n", out);
552 st1 = st;
553 for (cm = m; cm->in_conflict; cm++) {
554 if ((cm->type == Unchanged || cm->type == Changed)
555 && cm != m && cm->lo < cm->hi) {
556 printrange(out, b, cm->b, cm->lo);
557 break;
559 printrange(out, b, cm->b+st1, cm->bl-st1);
560 st1 = 0;
562 fputs(words ? "===" : "=======\n", out);
563 st1 = st;
564 for (cm = m; cm->in_conflict; cm++) {
565 if (cm->type == Unchanged &&
566 cm != m && cm->lo < cm->hi) {
567 printrange(out, c, cm->c, cm->lo);
568 break;
570 if (cm->type == Changed)
571 st1 = 0; /* All of result of change must be printed */
572 printrange(out, c, cm->c+st1, cm->cl-st1);
573 st1 = 0;
575 if (!found_conflict) {
576 /* This section was wiggled in successfully,
577 * but full conflict display was requested.
578 * So now print out the wiggled result as well.
580 fputs(words ? "&&&" : "&&&&&&&\n", out);
581 st1 = st;
582 for (cm = m; cm->in_conflict; cm++) {
583 int last = 0;
584 if ((cm->type == Unchanged || cm->type == Changed)
585 && cm != m && cm->lo < cm->hi)
586 last = 1;
587 switch (cm->type) {
588 case Unchanged:
589 case AlreadyApplied:
590 case Unmatched:
591 printrange(out, a, cm->a+st1,
592 last ? cm->lo : cm->al-st1);
593 break;
594 case Extraneous:
595 break;
596 case Changed:
597 printrange(out, c, cm->c+st1,
598 last ? cm->lo : cm->cl-st1);
599 break;
600 case Conflict:
601 case End:
602 assert(0);
604 if (last)
605 break;
606 st1 = 0;
609 fputs(words ? "--->>>" : ">>>>>>>\n", out);
610 m = cm;
611 if (m->in_conflict && m->type == Unchanged
612 && m->hi >= m->al) {
613 printrange(out, a, m->a+m->lo, m->hi-m->lo);
614 m++;
618 /* there is always some non-conflict after a conflict,
619 * unless we hit the end
621 if (m->type == End)
622 break;
624 if (do_trace) {
625 printf("<<%s: %d-%d,%d-%d,%d-%d%s(%d,%d)>>\n",
626 m->type==Unmatched?"Unmatched":
627 m->type==Unchanged?"Unchanged":
628 m->type==Extraneous?"Extraneous":
629 m->type==Changed?"Changed":
630 m->type==AlreadyApplied?"AlreadyApplied":
631 m->type==Conflict?"Conflict":"unknown",
632 m->a, m->a+m->al-1,
633 m->b, m->b+m->bl-1,
634 m->c, m->c+m->cl-1,
635 m->in_conflict ? " in_conflict" : "",
636 m->lo, m->hi);
639 switch (m->type) {
640 case Unchanged:
641 case AlreadyApplied:
642 case Unmatched:
643 printrange(out, a, m->a, m->al);
644 break;
645 case Extraneous:
646 break;
647 case Changed:
648 printrange(out, c, m->c, m->cl);
649 break;
650 case Conflict:
651 if (m->conflict_ignored) {
652 printrange(out, a, m->a, m->al);
653 break;
655 case End:
656 assert(0);
661 int save_merge(struct file a, struct file b, struct file c,
662 struct merge *merger, char *file, int backup)
664 char *replacename = xmalloc(strlen(file) + 20);
665 char *orignew = xmalloc(strlen(file) + 20);
666 int fd;
667 FILE *outfile;
668 int err = 0;
669 strcpy(replacename, file);
670 strcat(replacename, "XXXXXX");
671 strcpy(orignew, file);
672 strcat(orignew, ".porig");
674 fd = mkstemp(replacename);
675 if (fd < 0) {
676 err = 1;
677 goto out;
679 outfile = fdopen(fd, "w");
680 print_merge(outfile, &a, &b, &c, 0, merger);
681 fclose(outfile);
682 if (backup && rename(file, orignew) != 0)
683 err = -2;
684 else if (rename(replacename, file) != 0)
685 err = -2;
687 out:
688 free(replacename);
689 free(orignew);
690 return err;