Add --no-ignore option
[wiggle/upstream.git] / merge2.c
blob592157c1b9259f751cd2fd01cbd7a610eda07904
1 /*
2 * wiggle - apply rejected patches
4 * Copyright (C) 2005 Neil Brown <neilb@cse.unsw.edu.au>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 * Author: Neil Brown
22 * Email: <neilb@cse.unsw.edu.au>
23 * Paper: Neil Brown
24 * School of Computer Science and Engineering
25 * The University of New South Wales
26 * Sydney, 2052
27 * Australia
30 #include "wiggle.h"
31 #include <stdlib.h>
32 #include <malloc.h>
35 * Second attempt at merging....
37 * We want to create a mergelist which identifies 'orig' and 'after'
38 * sections (from a and c) and conflicts (which are ranges of a,b,c which
39 * all don't match).
40 * It is also helpful to differentiate 'orig' sections that aren't
41 * matched in 'b' with orig sections that are.
42 * To help with highlighting, it will be useful to know where
43 * the conflicts match the csl lists.
45 * This can all be achieved with a list of (a,b,c,c1,c1) 5-tuples.
46 * If two consecutive differ in more than one of a,b,c, it is a
47 * conflict.
48 * If only 'a' differ, it is un-matched original.
49 * If only 'b' differ, it is matched, unchanged original
50 * If only 'c' differ, it is 1
53 static inline int min(int a, int b)
55 return a<b?a:b;
57 static inline void assert(int a)
59 if (!a) abort();
62 int check_alreadyapplied(struct file af, struct file cf,
63 struct merge *m)
65 int i;
66 if (m->al != m->cl)
67 return 0;
68 for (i=0; i<m->al; i++) {
69 if (af.list[m->a+i].len != cf.list[m->c+i].len)
70 return 0;
71 if (strncmp(af.list[m->a+i].start,
72 cf.list[m->c+i].start,
73 af.list[m->a+i].len)!= 0)
74 return 0;
76 #if 0
77 printf("already applied %d,%d,%d - %d,%d,%d\n",
78 m->a,m->b,m->c,m->al,m->bl,m->cl);
79 printf(" %.10s - %.10s\n", af.list[m->a].start,
80 cf.list[m->c].start);
81 #endif
82 m->type = AlreadyApplied;
83 return 1;
86 inline int isolate_conflicts(struct file af, struct file bf, struct file cf,
87 struct csl *csl1, struct csl *csl2, int words,
88 struct merge *m)
90 /* A conflict indicates that something is definitely wrong
91 * and so we need to be a bit suspicious of nearby apparent matches.
92 * To display a conflict effectively we expands it's effect to
93 * include any Extraneous, Unmatched or Changed text.
94 * Also, unless 'words', we need to include any partial lines
95 * in the Unchanged text that forms the border of a conflict.
97 * A Changed text may also border a conflict, but it can
98 * only border one conflict (where as an Unchanged can border
99 * a preceeding and a following conflict).
100 * The 'new' section of a Changed text appears in the
101 * conflict as does any part of the original before
102 * a newline.
105 int i,j,k;
106 int cnt = 0;
108 for (i=0; m[i].type != End; i++)
109 if (m[i].type == Conflict) {
110 /* We have a conflict here.
111 * First search backwards for an Unchanged marking
112 * things as in_conflict. Then find the
113 * cut-point in the Unchanged. If there isn't one,
114 * keep looking.
116 * Then search forward doing the same thing.
118 cnt++;
119 m[i].in_conflict = 1;
120 j = i;
121 while (--j >= 0) {
122 if (!m[j].in_conflict) {
123 m[j].in_conflict = 1;
124 m[j].lo = 0;
125 } else if (m[j].type == Changed) {
126 /* This can no longer form a border */
127 m[j].lo = 0; m[j].hi = -1;
128 /* We merge these conflicts and stop searching */
129 cnt--;
130 break;
133 if (m[j].type == Unchanged || m[j].type == Changed) {
134 if (words) {
135 m[j].hi = m[j].al;
136 break;
138 /* need to find the last line-break, which
139 * might be after the last newline, if there
140 * is one, or might be at the start
142 for (k=m[j].al; k>0; k--)
143 if (ends_line(af.list[m[j].a+k-1]))
144 break;
145 if (k > 0)
146 m[j].hi = k;
147 else if ((m[j].a == 0 || ends_line(af.list[m[j].a-1])) &&
148 (m[j].b == 0 || ends_line(bf.list[m[j].b-1])) &&
149 (m[j].c == 0 || ends_line(cf.list[m[j].c-1])))
150 m[j].hi = 0;
151 else
152 /* no start-of-line found... */
153 m[j].hi = -1;
154 if (m[j].hi > 0 && m[j].type == Changed) {
155 /* this can only work if start is also a linke break */
156 if ((m[j].a == 0 || ends_line(af.list[m[j].a-1])) &&
157 (m[j].b == 0 || ends_line(bf.list[m[j].b-1])) &&
158 (m[j].c == 0 || ends_line(cf.list[m[j].c-1])))
159 /* ok */;
160 else
161 m[j].hi = -1;
163 if (m[j].hi >= 0)
164 break;
167 #if 0
168 if (j>=0 && m[j].in_conflict && m[j].type == Unchanged &&
169 m[j].hi == m[j].lo) {
170 /* nothing to separate conflicts, merge them */
171 m[j].lo = 0;
172 m[j].hi = -1;
173 cnt--;
175 #endif
176 /* now the forward search */
177 for (j=i+1; m[j].type != End; j++) {
178 m[j].in_conflict = 1;
179 if (m[j].type == Unchanged || m[j].type == Changed) {
180 m[j].hi = m[j].al;
181 if (words) {
182 m[j].lo = 0;
183 break;
185 /* need to find a line-break, which might be at
186 * the very beginning, or might be after the
187 * first newline - if there is one
189 if ((m[j].a == 0 || ends_line(af.list[m[j].a-1])) &&
190 (m[j].b == 0 || ends_line(bf.list[m[j].b-1])) &&
191 (m[j].c == 0 || ends_line(cf.list[m[j].c-1])))
192 m[j].lo = 0;
193 else {
194 for (k = 0 ; k < m[j].al ; k++)
195 if (ends_line(af.list[m[j].a+k]))
196 break;
197 if (k<m[j].al)
198 m[j].lo = k+1;
199 else
200 /* no start-of-line found */
201 m[j].lo = m[j].al+1;
203 if (m[j].lo <= m[j].al+1 && m[j].type == Changed) {
204 /* this can only work if the end is a line break */
205 if (ends_line(af.list[m[j].a+m[j].al-1]) &&
206 ends_line(bf.list[m[j].b+m[j].bl-1]) &&
207 ends_line(cf.list[m[j].c+m[j].cl-1]))
208 /* ok */;
209 else
210 m[j].lo = m[j].al+1;
212 if (m[j].lo < m[j].al+1)
213 break;
216 i = j - 1;
218 return cnt;
222 struct ci make_merger(struct file af, struct file bf, struct file cf,
223 struct csl *csl1, struct csl *csl2, int words,
224 int ignore_already)
226 /* find the wiggles and conflicts between csl1 and csl2
228 struct ci rv;
229 int i,l;
230 int a,b,c,c1,c2;
231 int wiggle_found = 0;
233 rv.conflicts = rv.wiggles = rv.ignored = 0;
235 for (i=0; csl1[i].len; i++);
236 l = i;
237 for (i=0; csl2[i].len; i++);
238 l += i;
239 /* maybe a bit of slack at each end */
240 l = l* 4 + 10;
242 rv.merger = malloc(sizeof(struct merge)*l);
243 if (!rv.merger)
244 return rv;
246 a=b=c=c1=c2 = 0;
247 i = 0;
248 while (1) {
249 int match1, match2;
250 match1 = (a>=csl1[c1].a && b >= csl1[c1].b); /* c1 doesn't match */
251 match2 = (b>=csl2[c2].a && c >= csl2[c2].b);
253 rv.merger[i].a = a;
254 rv.merger[i].b = b;
255 rv.merger[i].c = c;
256 rv.merger[i].c1 = c1;
257 rv.merger[i].c2 = c2;
258 rv.merger[i].in_conflict = 0;
260 if (!match1 && match2) {
261 if (a < csl1[c1].a) {
262 /* some unmatched text */
263 rv.merger[i].type = Unmatched;
264 rv.merger[i].al = csl1[c1].a - a;
265 rv.merger[i].bl = 0;
266 rv.merger[i].cl = 0;
267 wiggle_found ++;
268 } else {
269 int newb;
270 int j;
271 assert(b<csl1[c1].b);
272 /* some Extraneous text */
273 /* length is min of unmatched on left
274 * and matched on right
276 rv.merger[i].type = Extraneous;
277 rv.merger[i].al = 0;
278 rv.merger[i].cl =
279 rv.merger[i].bl =
280 min(csl1[c1].b - b,
281 csl2[c2].len - (b-csl2[c2].a));
282 newb = b+rv.merger[i].bl;
283 for (j=b; j<newb; j++) {
284 if (bf.list[j].start[0] == '\0') {
285 if (wiggle_found > 1)
286 rv.wiggles++;
287 wiggle_found = 0;
288 } else
289 wiggle_found ++;
292 } else if (match1 && !match2) {
293 /* some changed text
294 * if 'c' is currently at a suitable cut-point, then
295 * we can look for a triple-cut-point for start.
296 * Also, if csl2[c2].b isn't in a conflict, and is
297 * a suitable cut-point, then we could make a
298 * triple-cut-point for end of a conflict.
301 rv.merger[i].type = Changed;
302 rv.merger[i].bl = min(csl1[c1].b+csl1[c1].len, csl2[c2].a) - b;
303 rv.merger[i].al = rv.merger[i].bl;
304 rv.merger[i].cl = csl2[c2].b - c;
305 } else if (match1 && match2) {
306 /* Some unchanged text
308 rv.merger[i].type = Unchanged;
309 rv.merger[i].bl =
310 min(csl1[c1].len - (b-csl1[c1].b),
311 csl2[c2].len - (b-csl2[c2].a));
312 rv.merger[i].al = rv.merger[i].cl =
313 rv.merger[i].bl;
314 } else {
315 /* must be a conflict.
316 * Move a and c to next match, and b to closest of the two
318 rv.merger[i].type = Conflict;
319 rv.merger[i].al = csl1[c1].a - a;
320 rv.merger[i].cl = csl2[c2].b - c;
321 rv.merger[i].bl = min(csl1[c1].b, csl2[c2].a) - b;
322 if (ignore_already &&
323 check_alreadyapplied(af,cf,&rv.merger[i]))
324 rv.ignored ++;
326 a += rv.merger[i].al;
327 b += rv.merger[i].bl;
328 c += rv.merger[i].cl;
329 i++;
331 while (csl1[c1].a + csl1[c1].len <= a && csl1[c1].len) c1++;
332 assert(csl1[c1].b + csl1[c1].len >= b);
333 while (csl2[c2].b + csl2[c2].len <= c && csl2[c2].len) c2++;
334 assert(csl2[c2].a + csl2[c2].len >= b);
335 if (csl1[c1].len == 0 && csl2[c2].len == 0 &&
336 a == csl1[c1].a && b == csl1[c1].b &&
337 b == csl2[c2].a && c == csl2[c2].b)
338 break;
340 rv.merger[i].type = End;
341 rv.merger[i].in_conflict = 0;
342 rv.conflicts = isolate_conflicts(af,bf,cf,csl1,csl2,words,rv.merger);
343 if (wiggle_found)
344 rv.wiggles++;
345 return rv;
348 void printrange(FILE *out, struct file *f, int start, int len)
350 while (len> 0) {
351 printword(out, f->list[start]);
352 start++;
353 len--;
357 struct ci print_merge2(FILE *out, struct file *a, struct file *b, struct file *c,
358 struct csl *c1, struct csl *c2,
359 int words, int ignore_already)
361 struct ci rv = make_merger(*a, *b, *c, c1, c2,
362 words, ignore_already);
363 struct merge *m;
365 for (m=rv.merger; m->type != End ; m++) {
366 struct merge *cm;
367 #if 1
368 static int v= -1;
369 if (v == -1) {
370 if (getenv("WiggleVerbose"))
371 v = 1;
372 else
373 v = 0;
375 if (v)
376 printf("[%s: %d-%d,%d-%d,%d-%d%s(%d,%d)]\n",
377 m->type==Unmatched?"Unmatched":
378 m->type==Unchanged?"Unchanged":
379 m->type==Extraneous?"Extraneous":
380 m->type==Changed?"Changed":
381 m->type==AlreadyApplied?"AlreadyApplied":
382 m->type==Conflict?"Conflict":"unknown",
383 m->a, m->a+m->al-1,
384 m->b, m->b+m->bl-1,
385 m->c, m->c+m->cl-1, m->in_conflict?" in_conflict":"",
386 m->lo, m->hi);
387 #endif
388 while (m->in_conflict) {
389 /* need to print from 'hi' to 'lo' of next
390 * Unchanged which is < it's hi
392 int st = m->hi;
393 if (m->hi <= m->lo)
394 st = 0;
396 if (m->type == Unchanged)
397 printrange(out, a, m->a+m->lo, m->hi - m->lo);
399 #if 1
400 if (v)
401 for (cm=m; cm->in_conflict; cm++) {
402 printf("{%s: %d-%d,%d-%d,%d-%d%s(%d,%d)}\n",
403 cm->type==Unmatched?"Unmatched":
404 cm->type==Unchanged?"Unchanged":
405 cm->type==Extraneous?"Extraneous":
406 cm->type==Changed?"Changed":
407 cm->type==AlreadyApplied?"AlreadyApplied":
408 cm->type==Conflict?"Conflict":"unknown",
409 cm->a, cm->a+cm->al-1,
410 cm->b, cm->b+cm->bl-1,
411 cm->c, cm->c+cm->cl-1, cm->in_conflict?" in_conflict":"",
412 cm->lo, cm->hi);
413 if ((cm->type == Unchanged ||cm->type == Changed) && cm != m && cm->lo < cm->hi)
414 break;
416 #endif
418 fputs(words?"<<<---":"<<<<<<<\n", out);
419 for (cm=m; cm->in_conflict; cm++) {
420 if ((cm->type == Unchanged || cm->type == Changed) && cm != m && cm->lo < cm->hi) {
421 printrange(out, a, cm->a, cm->lo);
422 break;
424 printrange(out, a, cm->a+st, cm->al-st);
425 st = 0;
427 fputs(words?"|||":"|||||||\n", out);
428 st = m->hi;
429 for (cm=m; cm->in_conflict; cm++) {
430 if ((cm->type == Unchanged || cm->type == Changed) && cm != m && cm->lo < cm->hi) {
431 printrange(out, b, cm->b, cm->lo);
432 break;
434 printrange(out, b, cm->b+st, cm->bl-st);
435 st = 0;
437 fputs(words?"===":"=======\n", out);
438 st = m->hi;
439 for (cm=m; cm->in_conflict; cm++) {
440 if (cm->type == Unchanged && cm != m && cm->lo < cm->hi) {
441 printrange(out, c, cm->c, cm->lo);
442 break;
444 if (cm->type == Changed)
445 st = 0; /* All of result of change must be printed */
446 printrange(out, c, cm->c+st, cm->cl-st);
447 st = 0;
449 fputs(words?"--->>>":">>>>>>>\n", out);
450 m = cm;
451 if (m->in_conflict && m->hi >= m->al) {
452 assert(m->type == Unchanged);
453 printrange(out, a, m->a+m->lo, m->hi-m->lo);
454 m++;
458 /* there is always some non-conflict after a conflict,
459 * unless we hit the end
461 if (m->type == End)
462 break;
463 #if 1
464 if (v) {
465 printf("<<%s: %d-%d,%d-%d,%d-%d%s(%d,%d)>>\n",
466 m->type==Unmatched?"Unmatched":
467 m->type==Unchanged?"Unchanged":
468 m->type==Extraneous?"Extraneous":
469 m->type==Changed?"Changed":
470 m->type==AlreadyApplied?"AlreadyApplied":
471 m->type==Conflict?"Conflict":"unknown",
472 m->a, m->a+m->al-1,
473 m->b, m->b+m->bl-1,
474 m->c, m->c+m->cl-1, m->in_conflict?" in_conflict":"",
475 m->lo, m->hi);
477 #endif
478 switch(m->type) {
479 case Unchanged:
480 case AlreadyApplied:
481 case Unmatched:
482 printrange(out, a, m->a, m->al);
483 break;
484 case Extraneous:
485 break;
486 case Changed:
487 printrange(out, c, m->c, m->cl);
488 break;
489 case Conflict:
490 case End: assert(0);
493 return rv;