Browser: improve rendering of conflicts in the the 'merge' view.
[wiggle/upstream.git] / merge2.c
blob34080767022620da798f5696cb72fae78b51f94c
1 /*
2 * wiggle - apply rejected patches
4 * Copyright (C) 2005 Neil Brown <neilb@cse.unsw.edu.au>
5 * Copyright (C) 2010 Neil Brown <neilb@suse.de>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 * Author: Neil Brown
23 * Email: <neilb@suse.de>
26 #include "wiggle.h"
27 #include <stdlib.h>
30 * Second attempt at merging....
32 * We want to create a mergelist which identifies 'orig' and 'after'
33 * sections (from a and c) and conflicts (which are ranges of a,b,c which
34 * all don't match).
35 * It is also helpful to differentiate 'orig' sections that aren't
36 * matched in 'b' with orig sections that are.
37 * To help with highlighting, it will be useful to know where
38 * the conflicts match the csl lists.
40 * This can all be achieved with a list of (a,b,c,c1,c1) 5-tuples.
41 * If two consecutive differ in more than one of a,b,c, it is a
42 * conflict.
43 * If only 'a' differ, it is un-matched original.
44 * If only 'b' differ, it is matched, unchanged original
45 * If only 'c' differ, it is 1
48 static inline int min(int a, int b)
50 return a < b ? a : b;
52 static inline void assert(int a)
54 if (!a)
55 abort();
58 static int check_alreadyapplied(struct file af, struct file cf,
59 struct merge *m)
61 int i;
62 if (m->al != m->cl)
63 return 0;
64 for (i = 0; i < m->al; i++) {
65 if (af.list[m->a+i].len != cf.list[m->c+i].len)
66 return 0;
67 if (strncmp(af.list[m->a+i].start,
68 cf.list[m->c+i].start,
69 af.list[m->a+i].len) != 0)
70 return 0;
72 if (do_trace) {
73 printf("already applied %d,%d,%d - %d,%d,%d\n",
74 m->a, m->b, m->c, m->al, m->bl, m->cl);
75 printf(" %.10s - %.10s\n", af.list[m->a].start,
76 cf.list[m->c].start);
78 m->type = AlreadyApplied;
79 return 1;
82 /* A 'cut-point' is a location in the merger where it is reasonable
83 * the change the mode of display - between displaying the merger
84 * and displaying the separate streams.
85 * A 'conflict' can only be displayed as separate stream so when
86 * one is found, we need to find a preceeding and trailing cut-point
87 * and enlarge the conflict to that range.
88 * A suitable location is one where all three streams are at a line-end.
90 static int is_cutpoint(struct merge m,
91 struct file af, struct file bf, struct file cf)
93 return ((m.a == 0 || ends_line(af.list[m.a-1])) &&
94 (m.b == 0 || ends_line(bf.list[m.b-1])) &&
95 (m.c == 0 || ends_line(cf.list[m.c-1])));
98 static int isolate_conflicts(struct file af, struct file bf, struct file cf,
99 struct csl *csl1, struct csl *csl2, int words,
100 struct merge *m, int show_wiggles)
102 /* A conflict indicates that something is definitely wrong
103 * and so we need to be a bit suspicious of nearby apparent matches.
104 * To display a conflict effectively we expands it's effect to
105 * include any Extraneous, Unmatched, Changed or AlreadyApplied text.
106 * Also, unless 'words', we need to include any partial lines
107 * in the Unchanged text that forms the border of a conflict.
109 * A Changed text may also border a conflict, but it can
110 * only border one conflict (where as an Unchanged can border
111 * a preceeding and a following conflict).
112 * The 'new' section of a Changed text appears in the
113 * conflict as does any part of the original before
114 * a newline.
116 * If 'show_wiggles' is set we treat wiggles like conflicts.
117 * A 'wiggle' is implied by any Extraneous text being ignored,
118 * or any line that has both Changed and Unmatched content.
119 * (Unmatched content where nothing is changed is common and not
120 * really a 'wiggle').
122 * A hunk header is never considered part of a conflict. It
123 * thereby can serve as a separator between conflicts.
125 * We need to ensure there is adequate context for the conflict.
126 * So ensure there are at least 3 newlines in Extraneous or
127 * Unchanged on both sides of a Conflict - but don't go so far
128 * as including a hunk header.
129 * If there are 3, and they are all in 'Unchanged' sections, then
130 * that much context is not really needed - reduce it a bit.
132 int i, j, k;
133 int cnt = 0;
134 int changed = 0;
135 int unmatched = 0;
137 for (i = 0; m[i].type != End; i++) {
138 if (m[i].type == Changed)
139 changed = 1;
140 if (m[i].type == Unmatched)
141 unmatched = 1;
142 if (m[i].type == Conflict ||
143 (show_wiggles && ((changed && unmatched)
144 || m[i].type == Extraneous))) {
145 /* We have a conflict (or wiggle) here.
146 * First search backwards for an Unchanged marking
147 * things as in_conflict. Then find the
148 * cut-point in the Unchanged. If there isn't one,
149 * keep looking.
151 * Then search forward doing the same thing.
153 int newlines = 0;
154 cnt++;
155 m[i].in_conflict = 1;
156 j = i;
157 while (--j >= 0) {
158 if (m[j].type == Extraneous &&
159 bf.list[m[j].b].start[0] == '\0')
160 /* hunk header - not conflict any more */
161 break;
162 if (!m[j].in_conflict) {
163 m[j].in_conflict = 1;
164 m[j].lo = 0;
165 } else if (m[j].type == Changed) {
166 /* This can no longer form a border */
167 m[j].hi = -1;
168 /* We merge these conflicts and stop searching */
169 cnt--;
170 break;
172 if (m[j].type == Extraneous) {
173 for (k = m[j].bl; k > 0; k--)
174 if (ends_line(bf.list[m[j].b+k-1]))
175 newlines++;
178 if (m[j].type == Unchanged || m[j].type == Changed) {
179 /* If we find enough newlines in this section,
180 * then we only really need 1, but would rather
181 * it wasn't the first one. 'firstk' allows us
182 * to track which newline we actually use
184 int firstk = m[j].al+1;
185 if (words) {
186 m[j].hi = m[j].al;
187 break;
189 /* need to find the last line-break, which
190 * might be after the last newline, if there
191 * is one, or might be at the start
193 for (k = m[j].al; k > 0; k--)
194 if (ends_line(af.list[m[j].a+k-1])) {
195 if (firstk >= m[j].al)
196 firstk = k;
197 newlines++;
198 if (newlines >= 3) {
199 k = firstk;
200 break;
203 if (k > 0)
204 m[j].hi = k;
205 else if (is_cutpoint(m[j], af,bf,cf))
206 m[j].hi = 0;
207 else
208 /* no start-of-line found... */
209 m[j].hi = -1;
210 if (m[j].hi > 0 && m[j].type == Changed) {
211 /* this can only work if start is
212 * also a line break */
213 if (is_cutpoint(m[j], af,bf,cf))
214 /* ok */;
215 else
216 m[j].hi = -1;
218 if (m[j].hi >= 0)
219 break;
223 /* now the forward search */
224 newlines = 0;
225 for (j = i+1; m[j].type != End; j++) {
226 if (m[j].type == Extraneous &&
227 bf.list[m[j].b].start[0] == '\0')
228 /* hunk header - not conflict any more */
229 break;
230 m[j].in_conflict = 1;
231 if (m[j].type == Extraneous) {
232 for (k = 0; k < m[j].bl; k++)
233 if (ends_line(bf.list[m[j].b+k]))
234 newlines++;
236 if (m[j].type == Unchanged || m[j].type == Changed) {
237 m[j].hi = m[j].al;
238 if (words) {
239 m[j].lo = 0;
240 break;
242 /* need to find a line-break, which might be at
243 * the very beginning, or might be after the
244 * first newline - if there is one
246 if (is_cutpoint(m[j], af,bf,cf))
247 m[j].lo = 0;
248 else {
249 /* If we find enough newlines in this section,
250 * then we only really need 1, but would rather
251 * it wasn't the first one. 'firstk' allows us
252 * to track which newline we actually use
254 int firstk = -1;
255 for (k = 0 ; k < m[j].al ; k++)
256 if (ends_line(af.list[m[j].a+k])) {
257 if (firstk <= 0)
258 firstk = k;
259 newlines++;
260 if (newlines >= 3) {
261 k = firstk;
262 break;
265 if (firstk >= 0 &&
266 m[j+1].type == Unmatched) {
267 /* If this Unmatched exceeds 3 lines, just stop here */
268 int p;
269 int nl = 0;
270 for (p = 0; p < m[j+1].al ; p++)
271 if (ends_line(af.list[m[j+1].a+p])) {
272 nl++;
273 if (nl > 3)
274 break;
276 if (nl > 3)
277 k = firstk;
279 if (k < m[j].al)
280 m[j].lo = k+1;
281 else
282 /* no start-of-line found */
283 m[j].lo = m[j].al+1;
285 if (m[j].lo <= m[j].al+1 && m[j].type == Changed) {
286 /* this can only work if the end is a line break */
287 if (is_cutpoint(m[j+1], af,bf,cf))
288 /* ok */;
289 else
290 m[j].lo = m[j].al+1;
292 if (m[j].lo < m[j].al+1)
293 break;
296 i = j - 1;
298 if (m[i].al > 0 && ends_line(af.list[m[i].a+m[i].al-1])) {
299 unmatched = 0;
300 changed = 0;
303 return cnt;
306 struct ci make_merger(struct file af, struct file bf, struct file cf,
307 struct csl *csl1, struct csl *csl2, int words,
308 int ignore_already, int show_wiggles)
310 /* find the wiggles and conflicts between csl1 and csl2
312 struct ci rv;
313 int i, l;
314 int a, b, c, c1, c2;
315 int wiggle_found = 0;
317 rv.conflicts = rv.wiggles = rv.ignored = 0;
319 for (i = 0; csl1[i].len; i++)
321 l = i;
322 for (i = 0; csl2[i].len; i++)
324 l += i;
325 /* maybe a bit of slack at each end */
326 l = l * 4 + 10;
328 rv.merger = xmalloc(sizeof(struct merge)*l);
330 a = b = c = c1 = c2 = 0;
331 i = 0;
332 while (1) {
333 int match1, match2;
334 match1 = (a >= csl1[c1].a && b >= csl1[c1].b); /* c1 doesn't match */
335 match2 = (b >= csl2[c2].a && c >= csl2[c2].b);
337 rv.merger[i].a = a;
338 rv.merger[i].b = b;
339 rv.merger[i].c = c;
340 rv.merger[i].c1 = c1;
341 rv.merger[i].c2 = c2;
342 rv.merger[i].in_conflict = 0;
344 if (!match1 && match2) {
345 /* This is either Unmatched or Extraneous - probably both.
346 * If the match2 is a hunk-header Extraneous, it must
347 * align with an end-of-line in 'a', so adjust endpoint
349 int newa = csl1[c1].a;
350 if (b < bf.elcnt && bf.list[b].start
351 && bf.list[b].start[0] == '\0') {
352 while (newa > a &&
353 !ends_line(af.list[newa-1]))
354 newa--;
355 while (newa < af.elcnt && !(newa == 0 || ends_line(af.list[newa-1])))
356 newa++;
358 if (a < newa) {
359 /* some unmatched text */
360 rv.merger[i].type = Unmatched;
361 rv.merger[i].al = newa - a;
362 rv.merger[i].bl = 0;
363 rv.merger[i].cl = 0;
364 wiggle_found++;
365 } else {
366 int newb;
367 int j;
368 assert(b < csl1[c1].b);
369 /* some Extraneous text */
370 /* length is min of unmatched on left
371 * and matched on right.
372 * However a hunk-header must be an
373 * Extraneous section by itself, so if this
374 * start with one, the length is 1, and if
375 * there is one in the middle, only take the
376 * text up to there for now.
378 rv.merger[i].type = Extraneous;
379 rv.merger[i].al = 0;
380 newb = b +
381 min(csl1[c1].b - b,
382 csl2[c2].len - (b-csl2[c2].a));
383 if (bf.list[b].start[0] == '\0')
384 newb = b + 1;
385 for (j = b; j < newb; j++) {
386 if (bf.list[j].start[0] == '\0') {
387 if (wiggle_found > 1)
388 rv.wiggles++;
389 wiggle_found = 0;
390 if (j > b)
391 newb = j;
392 } else
393 wiggle_found++;
395 rv.merger[i].cl =
396 rv.merger[i].bl = newb - b;
398 } else if (match1 && !match2) {
399 /* some changed text
400 * if 'c' is currently at a suitable cut-point, then
401 * we can look for a triple-cut-point for start.
402 * Also, if csl2[c2].b isn't in a conflict, and is
403 * a suitable cut-point, then we could make a
404 * triple-cut-point for end of a conflict.
407 rv.merger[i].type = Changed;
408 rv.merger[i].bl = min(csl1[c1].b+csl1[c1].len, csl2[c2].a) - b;
409 rv.merger[i].al = rv.merger[i].bl;
410 rv.merger[i].cl = csl2[c2].b - c;
411 } else if (match1 && match2) {
412 /* Some unchanged text
414 rv.merger[i].type = Unchanged;
415 rv.merger[i].bl =
416 min(csl1[c1].len - (b-csl1[c1].b),
417 csl2[c2].len - (b-csl2[c2].a));
418 rv.merger[i].al = rv.merger[i].cl =
419 rv.merger[i].bl;
420 } else {
421 /* must be a conflict.
422 * Move a and c to next match, and b to closest of the two
424 rv.merger[i].type = Conflict;
425 rv.merger[i].al = csl1[c1].a - a;
426 rv.merger[i].cl = csl2[c2].b - c;
427 rv.merger[i].bl = min(csl1[c1].b, csl2[c2].a) - b;
428 if (ignore_already &&
429 check_alreadyapplied(af, cf, &rv.merger[i]))
430 rv.ignored++;
432 a += rv.merger[i].al;
433 b += rv.merger[i].bl;
434 c += rv.merger[i].cl;
435 i++;
437 while (csl1[c1].a + csl1[c1].len <= a && csl1[c1].len)
438 c1++;
439 assert(csl1[c1].b + csl1[c1].len >= b);
440 while (csl2[c2].b + csl2[c2].len <= c && csl2[c2].len)
441 c2++;
442 assert(csl2[c2].a + csl2[c2].len >= b);
443 if (csl1[c1].len == 0 && csl2[c2].len == 0 &&
444 a == csl1[c1].a && b == csl1[c1].b &&
445 b == csl2[c2].a && c == csl2[c2].b)
446 break;
448 rv.merger[i].type = End;
449 rv.merger[i].a = a;
450 rv.merger[i].b = b;
451 rv.merger[i].c = c;
452 rv.merger[i].c1 = c1;
453 rv.merger[i].c2 = c2;
454 rv.merger[i].in_conflict = 0;
455 assert(i < l);
456 rv.conflicts = isolate_conflicts(af, bf, cf, csl1, csl2, words,
457 rv.merger, show_wiggles);
458 if (wiggle_found)
459 rv.wiggles++;
460 return rv;
463 static void printrange(FILE *out, struct file *f, int start, int len)
465 while (len > 0) {
466 printword(out, f->list[start]);
467 start++;
468 len--;
472 struct ci print_merge2(FILE *out, struct file *a, struct file *b, struct file *c,
473 struct csl *c1, struct csl *c2,
474 int words, int ignore_already, int show_wiggles)
476 struct ci rv = make_merger(*a, *b, *c, c1, c2,
477 words, ignore_already, show_wiggles);
478 struct merge *m;
480 for (m = rv.merger; m->type != End ; m++) {
481 struct merge *cm;
482 if (do_trace)
483 printf("[%s: %d-%d,%d-%d,%d-%d%s(%d,%d)]\n",
484 m->type==Unmatched ? "Unmatched" :
485 m->type==Unchanged ? "Unchanged" :
486 m->type==Extraneous ? "Extraneous" :
487 m->type==Changed ? "Changed" :
488 m->type==AlreadyApplied ? "AlreadyApplied" :
489 m->type==Conflict ? "Conflict":"unknown",
490 m->a, m->a+m->al-1,
491 m->b, m->b+m->bl-1,
492 m->c, m->c+m->cl-1,
493 m->in_conflict ? " in_conflict" : "",
494 m->lo, m->hi);
496 while (m->in_conflict) {
497 /* need to print from 'hi' to 'lo' of next
498 * Unchanged which is < it's hi
500 int found_conflict = 0;
501 int st = 0, st1;
502 if (m->type == Unchanged || m->type == Changed)
503 if (m->hi >= m->lo)
504 st = m->hi;
505 st1 = st;
507 if (m->type == Unchanged)
508 printrange(out, a, m->a+m->lo, m->hi - m->lo);
510 if (do_trace)
511 for (cm = m; cm->in_conflict; cm++) {
512 printf("{%s: %d-%d,%d-%d,%d-%d%s(%d,%d)}\n",
513 cm->type==Unmatched?"Unmatched":
514 cm->type==Unchanged?"Unchanged":
515 cm->type==Extraneous?"Extraneous":
516 cm->type==Changed?"Changed":
517 cm->type==AlreadyApplied?"AlreadyApplied":
518 cm->type==Conflict?"Conflict":"unknown",
519 cm->a, cm->a+cm->al-1,
520 cm->b, cm->b+cm->bl-1,
521 cm->c, cm->c+cm->cl-1,
522 cm->in_conflict ? " in_conflict" : "",
523 cm->lo, cm->hi);
524 if ((cm->type == Unchanged || cm->type == Changed)
525 && cm != m && cm->lo < cm->hi)
526 break;
529 fputs(words ? "<<<---" : "<<<<<<<\n", out);
530 for (cm = m; cm->in_conflict; cm++) {
531 if (cm->type == Conflict)
532 found_conflict = 1;
533 if ((cm->type == Unchanged || cm->type == Changed)
534 && cm != m && cm->lo < cm->hi) {
535 printrange(out, a, cm->a, cm->lo);
536 break;
538 printrange(out, a, cm->a+st1, cm->al-st1);
539 st1 = 0;
541 fputs(words ? "|||" : "|||||||\n", out);
542 st1 = st;
543 for (cm = m; cm->in_conflict; cm++) {
544 if ((cm->type == Unchanged || cm->type == Changed)
545 && cm != m && cm->lo < cm->hi) {
546 printrange(out, b, cm->b, cm->lo);
547 break;
549 printrange(out, b, cm->b+st1, cm->bl-st1);
550 st1 = 0;
552 fputs(words ? "===" : "=======\n", out);
553 st1 = st;
554 for (cm = m; cm->in_conflict; cm++) {
555 if (cm->type == Unchanged &&
556 cm != m && cm->lo < cm->hi) {
557 printrange(out, c, cm->c, cm->lo);
558 break;
560 if (cm->type == Changed)
561 st1 = 0; /* All of result of change must be printed */
562 printrange(out, c, cm->c+st1, cm->cl-st1);
563 st1 = 0;
565 if (!found_conflict) {
566 /* This section was wiggled in successfully,
567 * but full conflict display was requested.
568 * So now print out the wiggled result as well.
570 fputs(words ? "&&&" : "&&&&&&&\n", out);
571 st1 = st;
572 for (cm = m; cm->in_conflict; cm++) {
573 int last = 0;
574 if ((cm->type == Unchanged || cm->type == Changed)
575 && cm != m && cm->lo < cm->hi)
576 last = 1;
577 switch (cm->type) {
578 case Unchanged:
579 case AlreadyApplied:
580 case Unmatched:
581 printrange(out, a, cm->a+st1,
582 last ? cm->lo : cm->al-st1);
583 break;
584 case Extraneous:
585 break;
586 case Changed:
587 printrange(out, c, cm->c+st1,
588 last ? cm->lo : cm->cl-st1);
589 break;
590 case Conflict:
591 case End:
592 assert(0);
594 if (last)
595 break;
596 st1 = 0;
599 fputs(words ? "--->>>" : ">>>>>>>\n", out);
600 m = cm;
601 if (m->in_conflict && m->type == Unchanged
602 && m->hi >= m->al) {
603 printrange(out, a, m->a+m->lo, m->hi-m->lo);
604 m++;
608 /* there is always some non-conflict after a conflict,
609 * unless we hit the end
611 if (m->type == End)
612 break;
614 if (do_trace) {
615 printf("<<%s: %d-%d,%d-%d,%d-%d%s(%d,%d)>>\n",
616 m->type==Unmatched?"Unmatched":
617 m->type==Unchanged?"Unchanged":
618 m->type==Extraneous?"Extraneous":
619 m->type==Changed?"Changed":
620 m->type==AlreadyApplied?"AlreadyApplied":
621 m->type==Conflict?"Conflict":"unknown",
622 m->a, m->a+m->al-1,
623 m->b, m->b+m->bl-1,
624 m->c, m->c+m->cl-1,
625 m->in_conflict ? " in_conflict" : "",
626 m->lo, m->hi);
629 switch (m->type) {
630 case Unchanged:
631 case AlreadyApplied:
632 case Unmatched:
633 printrange(out, a, m->a, m->al);
634 break;
635 case Extraneous:
636 break;
637 case Changed:
638 printrange(out, c, m->c, m->cl);
639 break;
640 case Conflict:
641 case End:
642 assert(0);
645 return rv;