browse: allow selective listing of only files with wiggles or with conflicts.
[wiggle/upstream.git] / diff.c
blob8b9ffed5628c8969732e6bda01c77254fd28c41c
1 /*
2 * wiggle - apply rejected patches
4 * Copyright (C) 2003 Neil Brown <neilb@cse.unsw.edu.au>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 * Author: Neil Brown
22 * Email: <neilb@suse.de>
26 * calculate longest common sequence between two sequences
28 * Each sequence contains strings with
29 * hash start length
30 * We produce a list of tripples: a b len
31 * where A and B point to elements in the two sequences, and len is the number
32 * of common elements there
34 * To help keep matches close together (important when matching a changed fragment
35 * against a whole) we track the disagonal of the first and last match on any path.
36 * When choosing the best of two paths, we choose the furthest reaching unless
37 * the other has a match and it's absolute diagonal difference is significantly smaller.
38 * 'Significant' if the reduction in difference exceeds the loss of progress by a
39 * factor of 2.
43 #include <malloc.h>
44 #include "wiggle.h"
45 #include <string.h>
46 #include <stdlib.h>
49 struct v {
50 int x; /* x location of furthest reaching path of current cost */
51 int md; /* diagonal location of midline crossing */
52 int l; /* number of continuous common sequences found so far */
56 static int find_common(struct file *a, struct file *b,
57 int *alop, int *ahip,
58 int *blop, int *bhip,
59 int mid,
60 struct v *v)
62 /* examine matrix from alo to ahi and blo to bhi
63 * finding longest subsequence.
64 * return new {a,b}{lo,hi} either side of midline.
65 * i.e. alo+blo <= mid <= ahi+bhi
66 * and alo,blo to ahi,bhi is a common (possibly empty) subseq
68 * v is scratch space each is indexable from
69 * alo-bhi to ahi-blo inclusive
72 int klo, khi;
73 int k;
74 int alo = *alop;
75 int ahi = *ahip;
76 int blo = *blop;
77 int bhi = *bhip;
78 int x,y;
80 int best = (ahi-alo)+(bhi-blo);
81 int dist;
83 klo = khi = alo-blo;
84 v[klo].x = alo;
85 v[klo].l = 0;
87 while(1) {
89 for (k=klo ; k <= khi ; k+= 2) {
90 int snake = 0;
91 struct v vnew = v[k];
92 x = v[k].x;
93 y = x-k;
94 if (y > bhi) abort();
95 while (x < ahi && y < bhi &&
96 match(&a->list[x], &b->list[y])
97 ) {
98 x++;
99 y++;
100 snake=1;
102 vnew.x = x;
103 vnew.l += snake;
104 dist = (ahi-x)+(bhi-y);
105 if (dist < best) best = dist;
106 if (x+y >= mid &&
107 v[k].x+v[k].x-k <= mid) {
108 vnew.md = k;
110 v[k] = vnew;
112 if (dist == 0) {
113 /* OK! We have arrived.
114 * We crossed the midpoint at or after v[k].xm,v[k].ym
116 if (x != ahi) abort();
117 x = (v[k].md+mid)/2;
118 y = x-v[k].md;
119 *alop = x;
120 *blop = y;
122 while (x < ahi && y < bhi &&
123 match(&a->list[x], &b->list[y])
125 x++;
126 y++;
129 *ahip = x;
130 *bhip = y;
132 return k;
136 for (k=klo+1; k <= khi-1 ; k+= 2) {
137 if (v[k-1].x+1 >= v[k+1].x ) {
138 v[k] = v[k-1];
139 v[k].x++;
140 } else {
141 v[k] = v[k+1];
145 x = v[klo].x; y = x -(klo-1);
146 dist = abs((ahi-x)-(bhi-y));
147 if (dist <= best) {
148 v[klo-1] = v[klo];
149 klo --;
150 } else
151 while (dist > best) {
152 klo ++;
153 x = v[klo].x; y = x -(klo-1);
154 dist = abs((ahi-x)-(bhi-y));
157 x = v[khi].x+1; y = x - (khi+1);
158 dist = abs((ahi-x)-(bhi-y));
159 if (dist <= best) {
160 v[khi+1] = v[khi];
161 v[khi+1].x++;
162 khi ++;
163 } else
164 while (dist > best) {
165 khi --;
166 x = v[khi].x+1; y = x - (khi+1);
167 dist = abs((ahi-x)-(bhi-y));
172 static struct csl *lcsl(struct file *a, int alo, int ahi,
173 struct file *b, int blo, int bhi,
174 struct csl *csl,
175 struct v *v)
177 int len;
178 int alo1 = alo;
179 int ahi1 = ahi;
180 int blo1 = blo;
181 int bhi1 = bhi;
182 struct csl *rv = NULL;
183 int k;
185 if (ahi <= alo || bhi <= blo)
186 return csl;
189 k = find_common(a,b,
190 &alo1, &ahi1,
191 &blo1, &bhi1,
192 (ahi+bhi+alo+blo)/2,
194 if (k != ahi-bhi) abort();
196 len = v[k].l;
198 if (csl == NULL) {
199 rv = csl = malloc((len+1)*sizeof(*csl));
200 csl->len = 0;
202 if (len) {
203 csl = lcsl(a,alo,alo1,
204 b,blo,blo1,
205 csl, v);
207 if (ahi1 > alo1) {
208 /* need to add this common seq, possibly attach
209 * to last
211 if (csl->len &&
212 csl->a+csl->len == alo1 &&
213 csl->b+csl->len == blo1) {
214 csl->len += ahi1-alo1;
215 } else {
216 if (csl->len) csl++;
217 csl->len = ahi1-alo1;
218 csl->a = alo1;
219 csl->b = blo1;
220 csl[1].len = 0;
223 csl = lcsl(a,ahi1,ahi,
224 b,bhi1,bhi,
225 csl,v);
227 if (rv) {
228 if (csl->len)
229 csl++;
230 csl->a = ahi;
231 csl->b = bhi;
232 #if 1
233 if (rv+len != csl || csl->len != 0)
234 abort(); /* number of runs was wrong */
235 #endif
236 return rv;
237 } else
238 return csl;
241 /* If two common sequences are separated by only an add or remove,
242 * and the first common ends the same as the middle text,
243 * extend the second and contract the first in the hope that the
244 * first might become empty. This ameliorates against the greedyness
245 * of the 'diff' algorithm.
246 * We treat the final zero-length 'csl' as a common sequence which
247 * can be extended so we much make sure to add a new zero-length csl
248 * to the end.
249 * Once this is done, repeat the process but extend the first
250 * in favour of the second but only up to the last newline. This
251 * acknowledges that semantic units more often end with common
252 * text ("return 0;\n}\n", "\n") than start with it.
254 static void fixup(struct file *a, struct file *b, struct csl *list)
256 struct csl *list1, *orig;
257 int lasteol = -1;
258 int found_end = 0;
259 if (!list) return;
260 orig = list;
261 list1 = list+1;
262 while (list->len) {
263 if (list1->len == 0)
264 found_end = 1;
266 if ((list->a+list->len == list1->a &&
267 list->b+list->len != list1->b &&
268 /* text at b inserted */
269 match(&b->list[list->b+list->len-1],
270 &b->list[list1->b-1])
273 (list->b+list->len == list1->b &&
274 list->a+list->len != list1->a &&
275 /* text at a deleted */
276 match(&a->list[list->a+list->len-1],
277 &a->list[list1->a-1])
280 #if 0
281 printword(stderr, a->list[list1->a-1]);
282 printf("fixup %d,%d %d : %d,%d %d\n",
283 list->a,list->b,list->len,
284 list1->a,list1->b,list1->len);
285 #endif
286 if (ends_line(a->list[list->a+list->len-1])
287 && a->list[list->a+list->len-1].len==1
288 && lasteol == -1
290 #if 0
291 printf("E\n");
292 #endif
293 lasteol = list1->a-1;
295 list1->a--;
296 list1->b--;
297 list1->len++;
298 list->len--;
299 if (list->len == 0) {
300 lasteol = -1;
301 if (found_end) {
302 *list = *list1;
303 list1->a += list1->len;
304 list1->b += list1->len;
305 list1->len = 0;
306 } else if (list > orig)
307 list--;
308 else {
309 *list = *list1++;
310 /* printf("C\n");*/
313 } else {
314 if (lasteol >= 0) {
315 /* printf("seek %d\n", lasteol);*/
316 while (list1->a <= lasteol && (list1->len>1 ||
317 (found_end && list1->len > 0))) {
318 list1->a++;
319 list1->b++;
320 list1->len--;
321 list->len++;
323 lasteol=-1;
325 *++list = *list1;
326 if (found_end) {
327 list1->a += list1->len;
328 list1->b += list1->len;
329 list1->len = 0;
330 } else
331 list1++;
333 if (list->len && list1 == list) abort();
335 // list[1] = list1[0];
338 struct csl *diff(struct file a, struct file b)
340 struct v *v;
341 struct csl *csl;
342 v = malloc(sizeof(struct v)*(a.elcnt+b.elcnt+2));
343 v += b.elcnt+1;
345 csl = lcsl(&a, 0, a.elcnt,
346 &b, 0, b.elcnt,
347 NULL, v);
348 free(v-(b.elcnt+1));
349 fixup(&a, &b, csl);
350 if (!csl) {
351 csl = malloc(sizeof(*csl));
352 csl->len = 0;
353 csl->a = a.elcnt;
354 csl->b = b.elcnt;
356 return csl;
359 struct csl *diff_partial(struct file a, struct file b,
360 int alo, int ahi, int blo, int bhi)
362 struct v *v;
363 struct csl *csl;
364 v = malloc(sizeof(struct v)*(ahi-alo+bhi-blo+2));
365 v += bhi-alo+1;
367 csl = lcsl(&a, alo, ahi,
368 &b, blo, bhi,
369 NULL, v);
370 free(v-(bhi-alo+1));
371 fixup(&a, &b, csl);
372 return csl;
376 #ifdef MAIN
378 main(int argc, char *argv[])
380 struct file a, b;
381 struct csl *csl;
382 struct elmnt *lst = malloc(argc*sizeof(*lst));
383 int arg;
384 int alo, ahi, blo, bhi;
385 struct v *v;
386 int ln;
388 arg = 1;
389 a.elcnt = 0;
390 a.list = lst;
391 while (argv[arg] && strcmp(argv[arg],"--")) {
392 lst->hash = 0;
393 lst->start = argv[arg];
394 lst->len = strlen(argv[arg]);
395 a.elcnt++;
396 lst++;
397 arg++;
399 if (!argv[arg]) {
400 printf("AARGH\n");
401 exit(1);
403 arg++;
404 b.elcnt = 0;
405 b.list = lst;
406 while (argv[arg] && strcmp(argv[arg],"--")) {
407 lst->hash = 0;
408 lst->start = argv[arg];
409 lst->len = strlen(argv[arg]);
410 b.elcnt++;
411 lst++;
412 arg++;
415 v = malloc(sizeof(struct v)*(a.elcnt+b.elcnt+2));
416 v += b.elcnt+1;
417 alo = blo = 0;
418 ahi = a.elcnt;
419 bhi = b.elcnt;
420 #if 0
421 ln = find_common(&a, &b,
422 &alo, &ahi, &blo, &bhi,
423 (ahi+bhi)/2,
426 printf("ln=%d (%d,%d) -> (%d,%d)\n", ln,
427 alo,blo,ahi,bhi);
428 #else
429 csl = lcsl(&a, 0, a.elcnt,
430 &b, 0, b.elcnt,
431 NULL, v);
432 fixup(&a, &b, csl);
433 while (csl && csl->len) {
434 int i;
435 printf("%d,%d for %d:\n", csl->a,csl->b,csl->len);
436 for (i=0; i<csl->len; i++) {
437 printf(" %.*s (%.*s)\n",
438 a.list[csl->a+i].len, a.list[csl->a+i].start,
439 b.list[csl->b+i].len, b.list[csl->b+i].start);
441 csl++;
443 #endif
445 exit(0);
448 #endif