Release 0.8
[wiggle/upstream.git] / merge2.c
blob9c605eaa3094d3b8d23d8c6a771b691ba0960e34
1 /*
2 * wiggle - apply rejected patches
4 * Copyright (C) 2005 Neil Brown <neilb@cse.unsw.edu.au>
5 * Copyright (C) 2010 Neil Brown <neilb@suse.de>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Author: Neil Brown
23 * Email: <neilb@suse.de>
26 #include "wiggle.h"
27 #include <stdlib.h>
28 #include <malloc.h>
31 * Second attempt at merging....
33 * We want to create a mergelist which identifies 'orig' and 'after'
34 * sections (from a and c) and conflicts (which are ranges of a,b,c which
35 * all don't match).
36 * It is also helpful to differentiate 'orig' sections that aren't
37 * matched in 'b' with orig sections that are.
38 * To help with highlighting, it will be useful to know where
39 * the conflicts match the csl lists.
41 * This can all be achieved with a list of (a,b,c,c1,c1) 5-tuples.
42 * If two consecutive differ in more than one of a,b,c, it is a
43 * conflict.
44 * If only 'a' differ, it is un-matched original.
45 * If only 'b' differ, it is matched, unchanged original
46 * If only 'c' differ, it is 1
49 static inline int min(int a, int b)
51 return a<b?a:b;
53 static inline void assert(int a)
55 if (!a) abort();
58 int check_alreadyapplied(struct file af, struct file cf,
59 struct merge *m)
61 int i;
62 if (m->al != m->cl)
63 return 0;
64 for (i=0; i<m->al; i++) {
65 if (af.list[m->a+i].len != cf.list[m->c+i].len)
66 return 0;
67 if (strncmp(af.list[m->a+i].start,
68 cf.list[m->c+i].start,
69 af.list[m->a+i].len)!= 0)
70 return 0;
72 #if 0
73 printf("already applied %d,%d,%d - %d,%d,%d\n",
74 m->a,m->b,m->c,m->al,m->bl,m->cl);
75 printf(" %.10s - %.10s\n", af.list[m->a].start,
76 cf.list[m->c].start);
77 #endif
78 m->type = AlreadyApplied;
79 return 1;
82 inline int isolate_conflicts(struct file af, struct file bf, struct file cf,
83 struct csl *csl1, struct csl *csl2, int words,
84 struct merge *m)
86 /* A conflict indicates that something is definitely wrong
87 * and so we need to be a bit suspicious of nearby apparent matches.
88 * To display a conflict effectively we expands it's effect to
89 * include any Extraneous, Unmatched or Changed text.
90 * Also, unless 'words', we need to include any partial lines
91 * in the Unchanged text that forms the border of a conflict.
93 * A Changed text may also border a conflict, but it can
94 * only border one conflict (where as an Unchanged can border
95 * a preceeding and a following conflict).
96 * The 'new' section of a Changed text appears in the
97 * conflict as does any part of the original before
98 * a newline.
101 int i,j,k;
102 int cnt = 0;
104 for (i=0; m[i].type != End; i++)
105 if (m[i].type == Conflict) {
106 /* We have a conflict here.
107 * First search backwards for an Unchanged marking
108 * things as in_conflict. Then find the
109 * cut-point in the Unchanged. If there isn't one,
110 * keep looking.
112 * Then search forward doing the same thing.
114 cnt++;
115 m[i].in_conflict = 1;
116 j = i;
117 while (--j >= 0) {
118 if (!m[j].in_conflict) {
119 m[j].in_conflict = 1;
120 m[j].lo = 0;
121 } else if (m[j].type == Changed) {
122 /* This can no longer form a border */
123 m[j].lo = 0; m[j].hi = -1;
124 /* We merge these conflicts and stop searching */
125 cnt--;
126 break;
129 if (m[j].type == Unchanged || m[j].type == Changed) {
130 if (words) {
131 m[j].hi = m[j].al;
132 break;
134 /* need to find the last line-break, which
135 * might be after the last newline, if there
136 * is one, or might be at the start
138 for (k=m[j].al; k>0; k--)
139 if (ends_line(af.list[m[j].a+k-1]))
140 break;
141 if (k > 0)
142 m[j].hi = k;
143 else if ((m[j].a == 0 || ends_line(af.list[m[j].a-1])) &&
144 (m[j].b == 0 || ends_line(bf.list[m[j].b-1])) &&
145 (m[j].c == 0 || ends_line(cf.list[m[j].c-1])))
146 m[j].hi = 0;
147 else
148 /* no start-of-line found... */
149 m[j].hi = -1;
150 if (m[j].hi > 0 && m[j].type == Changed) {
151 /* this can only work if start is also a linke break */
152 if ((m[j].a == 0 || ends_line(af.list[m[j].a-1])) &&
153 (m[j].b == 0 || ends_line(bf.list[m[j].b-1])) &&
154 (m[j].c == 0 || ends_line(cf.list[m[j].c-1])))
155 /* ok */;
156 else
157 m[j].hi = -1;
159 if (m[j].hi >= 0)
160 break;
163 #if 0
164 if (j>=0 && m[j].in_conflict && m[j].type == Unchanged &&
165 m[j].hi == m[j].lo) {
166 /* nothing to separate conflicts, merge them */
167 m[j].lo = 0;
168 m[j].hi = -1;
169 cnt--;
171 #endif
172 /* now the forward search */
173 for (j=i+1; m[j].type != End; j++) {
174 m[j].in_conflict = 1;
175 if (m[j].type == Unchanged || m[j].type == Changed) {
176 m[j].hi = m[j].al;
177 if (words) {
178 m[j].lo = 0;
179 break;
181 /* need to find a line-break, which might be at
182 * the very beginning, or might be after the
183 * first newline - if there is one
185 if ((m[j].a == 0 || ends_line(af.list[m[j].a-1])) &&
186 (m[j].b == 0 || ends_line(bf.list[m[j].b-1])) &&
187 (m[j].c == 0 || ends_line(cf.list[m[j].c-1])))
188 m[j].lo = 0;
189 else {
190 for (k = 0 ; k < m[j].al ; k++)
191 if (ends_line(af.list[m[j].a+k]))
192 break;
193 if (k<m[j].al)
194 m[j].lo = k+1;
195 else
196 /* no start-of-line found */
197 m[j].lo = m[j].al+1;
199 if (m[j].lo <= m[j].al+1 && m[j].type == Changed) {
200 /* this can only work if the end is a line break */
201 if (ends_line(af.list[m[j].a+m[j].al-1]) &&
202 ends_line(bf.list[m[j].b+m[j].bl-1]) &&
203 ends_line(cf.list[m[j].c+m[j].cl-1]))
204 /* ok */;
205 else
206 m[j].lo = m[j].al+1;
208 if (m[j].lo < m[j].al+1)
209 break;
212 i = j - 1;
214 return cnt;
218 struct ci make_merger(struct file af, struct file bf, struct file cf,
219 struct csl *csl1, struct csl *csl2, int words,
220 int ignore_already)
222 /* find the wiggles and conflicts between csl1 and csl2
224 struct ci rv;
225 int i,l;
226 int a,b,c,c1,c2;
227 int wiggle_found = 0;
229 rv.conflicts = rv.wiggles = rv.ignored = 0;
231 for (i=0; csl1[i].len; i++);
232 l = i;
233 for (i=0; csl2[i].len; i++);
234 l += i;
235 /* maybe a bit of slack at each end */
236 l = l* 4 + 10;
238 rv.merger = malloc(sizeof(struct merge)*l);
239 if (!rv.merger)
240 return rv;
242 a=b=c=c1=c2 = 0;
243 i = 0;
244 while (1) {
245 int match1, match2;
246 match1 = (a>=csl1[c1].a && b >= csl1[c1].b); /* c1 doesn't match */
247 match2 = (b>=csl2[c2].a && c >= csl2[c2].b);
249 rv.merger[i].a = a;
250 rv.merger[i].b = b;
251 rv.merger[i].c = c;
252 rv.merger[i].c1 = c1;
253 rv.merger[i].c2 = c2;
254 rv.merger[i].in_conflict = 0;
256 if (!match1 && match2) {
257 if (a < csl1[c1].a) {
258 /* some unmatched text */
259 rv.merger[i].type = Unmatched;
260 rv.merger[i].al = csl1[c1].a - a;
261 rv.merger[i].bl = 0;
262 rv.merger[i].cl = 0;
263 wiggle_found ++;
264 } else {
265 int newb;
266 int j;
267 assert(b<csl1[c1].b);
268 /* some Extraneous text */
269 /* length is min of unmatched on left
270 * and matched on right
272 rv.merger[i].type = Extraneous;
273 rv.merger[i].al = 0;
274 rv.merger[i].cl =
275 rv.merger[i].bl =
276 min(csl1[c1].b - b,
277 csl2[c2].len - (b-csl2[c2].a));
278 newb = b+rv.merger[i].bl;
279 for (j=b; j<newb; j++) {
280 if (bf.list[j].start[0] == '\0') {
281 if (wiggle_found > 1)
282 rv.wiggles++;
283 wiggle_found = 0;
284 } else
285 wiggle_found ++;
288 } else if (match1 && !match2) {
289 /* some changed text
290 * if 'c' is currently at a suitable cut-point, then
291 * we can look for a triple-cut-point for start.
292 * Also, if csl2[c2].b isn't in a conflict, and is
293 * a suitable cut-point, then we could make a
294 * triple-cut-point for end of a conflict.
297 rv.merger[i].type = Changed;
298 rv.merger[i].bl = min(csl1[c1].b+csl1[c1].len, csl2[c2].a) - b;
299 rv.merger[i].al = rv.merger[i].bl;
300 rv.merger[i].cl = csl2[c2].b - c;
301 } else if (match1 && match2) {
302 /* Some unchanged text
304 rv.merger[i].type = Unchanged;
305 rv.merger[i].bl =
306 min(csl1[c1].len - (b-csl1[c1].b),
307 csl2[c2].len - (b-csl2[c2].a));
308 rv.merger[i].al = rv.merger[i].cl =
309 rv.merger[i].bl;
310 } else {
311 /* must be a conflict.
312 * Move a and c to next match, and b to closest of the two
314 rv.merger[i].type = Conflict;
315 rv.merger[i].al = csl1[c1].a - a;
316 rv.merger[i].cl = csl2[c2].b - c;
317 rv.merger[i].bl = min(csl1[c1].b, csl2[c2].a) - b;
318 if (ignore_already &&
319 check_alreadyapplied(af,cf,&rv.merger[i]))
320 rv.ignored ++;
322 a += rv.merger[i].al;
323 b += rv.merger[i].bl;
324 c += rv.merger[i].cl;
325 i++;
327 while (csl1[c1].a + csl1[c1].len <= a && csl1[c1].len) c1++;
328 assert(csl1[c1].b + csl1[c1].len >= b);
329 while (csl2[c2].b + csl2[c2].len <= c && csl2[c2].len) c2++;
330 assert(csl2[c2].a + csl2[c2].len >= b);
331 if (csl1[c1].len == 0 && csl2[c2].len == 0 &&
332 a == csl1[c1].a && b == csl1[c1].b &&
333 b == csl2[c2].a && c == csl2[c2].b)
334 break;
336 rv.merger[i].type = End;
337 rv.merger[i].in_conflict = 0;
338 rv.conflicts = isolate_conflicts(af,bf,cf,csl1,csl2,words,rv.merger);
339 if (wiggle_found)
340 rv.wiggles++;
341 return rv;
344 void printrange(FILE *out, struct file *f, int start, int len)
346 while (len> 0) {
347 printword(out, f->list[start]);
348 start++;
349 len--;
353 struct ci print_merge2(FILE *out, struct file *a, struct file *b, struct file *c,
354 struct csl *c1, struct csl *c2,
355 int words, int ignore_already)
357 struct ci rv = make_merger(*a, *b, *c, c1, c2,
358 words, ignore_already);
359 struct merge *m;
361 for (m=rv.merger; m->type != End ; m++) {
362 struct merge *cm;
363 #if 1
364 static int v= -1;
365 if (v == -1) {
366 if (getenv("WiggleVerbose"))
367 v = 1;
368 else
369 v = 0;
371 if (v)
372 printf("[%s: %d-%d,%d-%d,%d-%d%s(%d,%d)]\n",
373 m->type==Unmatched?"Unmatched":
374 m->type==Unchanged?"Unchanged":
375 m->type==Extraneous?"Extraneous":
376 m->type==Changed?"Changed":
377 m->type==AlreadyApplied?"AlreadyApplied":
378 m->type==Conflict?"Conflict":"unknown",
379 m->a, m->a+m->al-1,
380 m->b, m->b+m->bl-1,
381 m->c, m->c+m->cl-1, m->in_conflict?" in_conflict":"",
382 m->lo, m->hi);
383 #endif
384 while (m->in_conflict) {
385 /* need to print from 'hi' to 'lo' of next
386 * Unchanged which is < it's hi
388 int st = m->hi;
389 if (m->hi <= m->lo)
390 st = 0;
392 if (m->type == Unchanged)
393 printrange(out, a, m->a+m->lo, m->hi - m->lo);
395 #if 1
396 if (v)
397 for (cm=m; cm->in_conflict; cm++) {
398 printf("{%s: %d-%d,%d-%d,%d-%d%s(%d,%d)}\n",
399 cm->type==Unmatched?"Unmatched":
400 cm->type==Unchanged?"Unchanged":
401 cm->type==Extraneous?"Extraneous":
402 cm->type==Changed?"Changed":
403 cm->type==AlreadyApplied?"AlreadyApplied":
404 cm->type==Conflict?"Conflict":"unknown",
405 cm->a, cm->a+cm->al-1,
406 cm->b, cm->b+cm->bl-1,
407 cm->c, cm->c+cm->cl-1, cm->in_conflict?" in_conflict":"",
408 cm->lo, cm->hi);
409 if ((cm->type == Unchanged ||cm->type == Changed) && cm != m && cm->lo < cm->hi)
410 break;
412 #endif
414 fputs(words?"<<<---":"<<<<<<<\n", out);
415 for (cm=m; cm->in_conflict; cm++) {
416 if ((cm->type == Unchanged || cm->type == Changed) && cm != m && cm->lo < cm->hi) {
417 printrange(out, a, cm->a, cm->lo);
418 break;
420 printrange(out, a, cm->a+st, cm->al-st);
421 st = 0;
423 fputs(words?"|||":"|||||||\n", out);
424 st = m->hi;
425 for (cm=m; cm->in_conflict; cm++) {
426 if ((cm->type == Unchanged || cm->type == Changed) && cm != m && cm->lo < cm->hi) {
427 printrange(out, b, cm->b, cm->lo);
428 break;
430 printrange(out, b, cm->b+st, cm->bl-st);
431 st = 0;
433 fputs(words?"===":"=======\n", out);
434 st = m->hi;
435 for (cm=m; cm->in_conflict; cm++) {
436 if (cm->type == Unchanged && cm != m && cm->lo < cm->hi) {
437 printrange(out, c, cm->c, cm->lo);
438 break;
440 if (cm->type == Changed)
441 st = 0; /* All of result of change must be printed */
442 printrange(out, c, cm->c+st, cm->cl-st);
443 st = 0;
445 fputs(words?"--->>>":">>>>>>>\n", out);
446 m = cm;
447 if (m->in_conflict && m->hi >= m->al) {
448 assert(m->type == Unchanged);
449 printrange(out, a, m->a+m->lo, m->hi-m->lo);
450 m++;
454 /* there is always some non-conflict after a conflict,
455 * unless we hit the end
457 if (m->type == End)
458 break;
459 #if 1
460 if (v) {
461 printf("<<%s: %d-%d,%d-%d,%d-%d%s(%d,%d)>>\n",
462 m->type==Unmatched?"Unmatched":
463 m->type==Unchanged?"Unchanged":
464 m->type==Extraneous?"Extraneous":
465 m->type==Changed?"Changed":
466 m->type==AlreadyApplied?"AlreadyApplied":
467 m->type==Conflict?"Conflict":"unknown",
468 m->a, m->a+m->al-1,
469 m->b, m->b+m->bl-1,
470 m->c, m->c+m->cl-1, m->in_conflict?" in_conflict":"",
471 m->lo, m->hi);
473 #endif
474 switch(m->type) {
475 case Unchanged:
476 case AlreadyApplied:
477 case Unmatched:
478 printrange(out, a, m->a, m->al);
479 break;
480 case Extraneous:
481 break;
482 case Changed:
483 printrange(out, c, m->c, m->cl);
484 break;
485 case Conflict:
486 case End: assert(0);
489 return rv;