2 * wiggle - apply rejected patches
4 * Copyright (C) 2003 Neil Brown <neilb@cse.unsw.edu.au>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@suse.de>
26 * calculate longest common sequence between two sequences
28 * Each sequence contains strings with
30 * We produce a list of tripples: a b len
31 * where A and B point to elements in the two sequences, and len is the number
32 * of common elements there
34 * To help keep matches close together (important when matching a changed fragment
35 * against a whole) we track the disagonal of the first and last match on any path.
36 * When choosing the best of two paths, we choose the furthest reaching unless
37 * the other has a match and it's absolute diagonal difference is significantly smaller.
38 * 'Significant' if the reduction in difference exceeds the loss of progress by a
50 int x
; /* x location of furthest reaching path of current cost */
51 int md
; /* diagonal location of midline crossing */
52 int l
; /* number of continuous common sequences found so far */
56 static int find_common(struct file
*a
, struct file
*b
,
62 /* examine matrix from alo to ahi and blo to bhi
63 * finding longest subsequence.
64 * return new {a,b}{lo,hi} either side of midline.
65 * i.e. alo+blo <= mid <= ahi+bhi
66 * and alo,blo to ahi,bhi is a common (possibly empty) subseq
68 * v is scratch space each is indexable from
69 * alo-bhi to ahi-blo inclusive
80 int best
= (ahi
-alo
)+(bhi
-blo
);
89 for (k
=klo
; k
<= khi
; k
+= 2) {
95 while (x
< ahi
&& y
< bhi
&&
96 match(&a
->list
[x
], &b
->list
[y
])
104 dist
= (ahi
-x
)+(bhi
-y
);
105 if (dist
< best
) best
= dist
;
107 v
[k
].x
+v
[k
].x
-k
<= mid
) {
113 /* OK! We have arrived.
114 * We crossed the midpoint at or after v[k].xm,v[k].ym
116 if (x
!= ahi
) abort();
122 while (x
< ahi
&& y
< bhi
&&
123 match(&a
->list
[x
], &b
->list
[y
])
136 for (k
=klo
+1; k
<= khi
-1 ; k
+= 2) {
137 if (v
[k
-1].x
+1 >= v
[k
+1].x
) {
145 x
= v
[klo
].x
; y
= x
-(klo
-1);
146 dist
= abs((ahi
-x
)-(bhi
-y
));
151 while (dist
> best
) {
153 x
= v
[klo
].x
; y
= x
-(klo
-1);
154 dist
= abs((ahi
-x
)-(bhi
-y
));
157 x
= v
[khi
].x
+1; y
= x
- (khi
+1);
158 dist
= abs((ahi
-x
)-(bhi
-y
));
164 while (dist
> best
) {
166 x
= v
[khi
].x
+1; y
= x
- (khi
+1);
167 dist
= abs((ahi
-x
)-(bhi
-y
));
172 static struct csl
*lcsl(struct file
*a
, int alo
, int ahi
,
173 struct file
*b
, int blo
, int bhi
,
182 struct csl
*rv
= NULL
;
185 if (ahi
<= alo
|| bhi
<= blo
)
194 if (k
!= ahi
-bhi
) abort();
199 rv
= csl
= malloc((len
+1)*sizeof(*csl
));
203 csl
= lcsl(a
,alo
,alo1
,
208 /* need to add this common seq, possibly attach
212 csl
->a
+csl
->len
== alo1
&&
213 csl
->b
+csl
->len
== blo1
) {
214 csl
->len
+= ahi1
-alo1
;
217 csl
->len
= ahi1
-alo1
;
223 csl
= lcsl(a
,ahi1
,ahi
,
233 if (rv
+len
!= csl
|| csl
->len
!= 0)
234 abort(); /* number of runs was wrong */
241 /* If two common sequences are separated by only an add or remove,
242 * and the first common ends the same as the middle text,
243 * extend the second and contract the first in the hope that the
244 * first might become empty. This ameliorates against the greedyness
245 * of the 'diff' algorithm.
246 * We treat the final zero-length 'csl' as a common sequence which
247 * can be extended so we much make sure to add a new zero-length csl
249 * Once this is done, repeat the process but extend the first
250 * in favour of the second but only up to the last newline. This
251 * acknowledges that semantic units more often end with common
252 * text ("return 0;\n}\n", "\n") than start with it.
254 static void fixup(struct file
*a
, struct file
*b
, struct csl
*list
)
256 struct csl
*list1
, *orig
;
266 if ((list
->a
+list
->len
== list1
->a
&&
267 list
->b
+list
->len
!= list1
->b
&&
268 /* text at b inserted */
269 match(&b
->list
[list
->b
+list
->len
-1],
270 &b
->list
[list1
->b
-1])
273 (list
->b
+list
->len
== list1
->b
&&
274 list
->a
+list
->len
!= list1
->a
&&
275 /* text at a deleted */
276 match(&a
->list
[list
->a
+list
->len
-1],
277 &a
->list
[list1
->a
-1])
281 printword(stderr
, a
->list
[list1
->a
-1]);
282 printf("fixup %d,%d %d : %d,%d %d\n",
283 list
->a
,list
->b
,list
->len
,
284 list1
->a
,list1
->b
,list1
->len
);
286 if (ends_line(a
->list
[list
->a
+list
->len
-1])
287 && a
->list
[list
->a
+list
->len
-1].len
==1
293 lasteol
= list1
->a
-1;
299 if (list
->len
== 0) {
303 list1
->a
+= list1
->len
;
304 list1
->b
+= list1
->len
;
306 } else if (list
> orig
)
315 /* printf("seek %d\n", lasteol);*/
316 while (list1
->a
<= lasteol
&& (list1
->len
>1 ||
317 (found_end
&& list1
->len
> 0))) {
327 list1
->a
+= list1
->len
;
328 list1
->b
+= list1
->len
;
333 if (list
->len
&& list1
== list
) abort();
335 // list[1] = list1[0];
338 struct csl
*diff(struct file a
, struct file b
)
342 v
= malloc(sizeof(struct v
)*(a
.elcnt
+b
.elcnt
+2));
345 csl
= lcsl(&a
, 0, a
.elcnt
,
351 csl
= malloc(sizeof(*csl
));
359 struct csl
*diff_partial(struct file a
, struct file b
,
360 int alo
, int ahi
, int blo
, int bhi
)
364 v
= malloc(sizeof(struct v
)*(ahi
-alo
+bhi
-blo
+2));
367 csl
= lcsl(&a
, alo
, ahi
,
378 main(int argc
, char *argv
[])
382 struct elmnt
*lst
= malloc(argc
*sizeof(*lst
));
384 int alo
, ahi
, blo
, bhi
;
391 while (argv
[arg
] && strcmp(argv
[arg
],"--")) {
393 lst
->start
= argv
[arg
];
394 lst
->len
= strlen(argv
[arg
]);
406 while (argv
[arg
] && strcmp(argv
[arg
],"--")) {
408 lst
->start
= argv
[arg
];
409 lst
->len
= strlen(argv
[arg
]);
415 v
= malloc(sizeof(struct v
)*(a
.elcnt
+b
.elcnt
+2));
421 ln
= find_common(&a
, &b
,
422 &alo
, &ahi
, &blo
, &bhi
,
426 printf("ln=%d (%d,%d) -> (%d,%d)\n", ln
,
429 csl
= lcsl(&a
, 0, a
.elcnt
,
433 while (csl
&& csl
->len
) {
435 printf("%d,%d for %d:\n", csl
->a
,csl
->b
,csl
->len
);
436 for (i
=0; i
<csl
->len
; i
++) {
437 printf(" %.*s (%.*s)\n",
438 a
.list
[csl
->a
+i
].len
, a
.list
[csl
->a
+i
].start
,
439 b
.list
[csl
->b
+i
].len
, b
.list
[csl
->b
+i
].start
);