2 * wiggle - apply rejected patches
4 * Copyright (C) 2003 Neil Brown <neilb@cse.unsw.edu.au>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@cse.unsw.edu.au>
24 * School of Computer Science and Engineering
25 * The University of New South Wales
31 * calculate longest common sequence between two sequences
33 * Each sequence contains strings with
35 * We produce a list of tripples: a b len
36 * where A and B point to elements in the two sequences, and len is the number
37 * of common elements there
39 * To help keep matches close together (important when matching a changed fragment
40 * against a whole) we track the disagonal of the first and last match on any path.
41 * When choosing the best of two paths, we choose the furthest reaching unless
42 * the other has a match and it's absolute diagonal difference is significantly smaller.
43 * 'Significant' if the reduction in difference exceeds the loss of progress by a
55 int x
; /* x location of furthest reaching path of current cost */
56 int md
; /* diagonal location of midline crossing */
57 int l
; /* number of continuous common sequences found so far */
61 static int find_common(struct file
*a
, struct file
*b
,
67 /* examine matrix from alo to ahi and blo to bhi
68 * finding longest subsequence.
69 * return new {a,b}{lo,hi} either side of midline.
70 * i.e. alo+blo <= mid <= ahi+bhi
71 * and alo,blo to ahi,bhi is a common (possibly empty) subseq
73 * v is scratch space each is indexable from
74 * alo-bhi to ahi-blo inclusive
85 int best
= (ahi
-alo
)+(bhi
-blo
);
94 for (k
=klo
; k
<= khi
; k
+= 2) {
100 while (x
< ahi
&& y
< bhi
&&
101 match(&a
->list
[x
], &b
->list
[y
])
109 dist
= (ahi
-x
)+(bhi
-y
);
110 if (dist
< best
) best
= dist
;
112 v
[k
].x
+v
[k
].x
-k
<= mid
) {
118 /* OK! We have arrived.
119 * We crossed the midpoint at or after v[k].xm,v[k].ym
121 if (x
!= ahi
) abort();
127 while (x
< ahi
&& y
< bhi
&&
128 match(&a
->list
[x
], &b
->list
[y
])
141 for (k
=klo
+1; k
<= khi
-1 ; k
+= 2) {
142 if (v
[k
-1].x
+1 >= v
[k
+1].x
) {
150 x
= v
[klo
].x
; y
= x
-(klo
-1);
151 dist
= abs((ahi
-x
)-(bhi
-y
));
156 while (dist
> best
) {
158 x
= v
[klo
].x
; y
= x
-(klo
-1);
159 dist
= abs((ahi
-x
)-(bhi
-y
));
162 x
= v
[khi
].x
+1; y
= x
- (khi
+1);
163 dist
= abs((ahi
-x
)-(bhi
-y
));
169 while (dist
> best
) {
171 x
= v
[khi
].x
+1; y
= x
- (khi
+1);
172 dist
= abs((ahi
-x
)-(bhi
-y
));
177 static struct csl
*lcsl(struct file
*a
, int alo
, int ahi
,
178 struct file
*b
, int blo
, int bhi
,
187 struct csl
*rv
= NULL
;
190 if (ahi
<= alo
|| bhi
<= blo
)
199 if (k
!= ahi
-bhi
) abort();
204 rv
= csl
= malloc((len
+1)*sizeof(*csl
));
208 csl
= lcsl(a
,alo
,alo1
,
213 /* need to add this common seq, possibly attach
217 csl
->a
+csl
->len
== alo1
&&
218 csl
->b
+csl
->len
== blo1
) {
219 csl
->len
+= ahi1
-alo1
;
222 csl
->len
= ahi1
-alo1
;
228 csl
= lcsl(a
,ahi1
,ahi
,
238 if (rv
+len
!= csl
|| csl
->len
!= 0)
239 abort(); /* number of runs was wrong */
246 /* if two common sequences are separated by only an add or remove,
247 * and the first common ends the same as the middle text,
248 * extend the second and contract the first in the hope that the
249 * first might become empty. This ameliorates against the greedyness
250 * of the 'diff' algorithm.
251 * Once this is done, repeat the process but extend the first
252 * in favour of the second. The acknowledges that semantic units
253 * more often end with common text ("return 0;\n}\n", "\n") than
256 static void fixup(struct file
*a
, struct file
*b
, struct csl
*list
)
258 struct csl
*list1
, *orig
;
263 while (list
->len
&& list1
->len
) {
264 if ((list
->a
+list
->len
== list1
->a
&&
265 /* text at b inserted */
266 match(&b
->list
[list
->b
+list
->len
-1],
267 &b
->list
[list1
->b
-1])
270 (list
->b
+list
->len
== list1
->b
&&
271 /* text at a deleted */
272 match(&a
->list
[list
->a
+list
->len
-1],
273 &a
->list
[list1
->a
-1])
276 /* printword(a->list[list1->a-1]);
277 printf("fixup %d,%d %d : %d,%d %d\n",
278 list->a,list->b,list->len,
279 list1->a,list1->b,list1->len);
280 */ if (ends_line(a
->list
[list
->a
+list
->len
-1])
281 && a
->list
[list
->a
+list
->len
-1].len
==1
285 lasteol
= list1
->a
-1;
291 if (list
->len
== 0) {
302 /* printf("seek %d\n", lasteol);*/
303 while (list1
->a
<= lasteol
&& list1
->len
>1) {
317 struct csl
*diff(struct file a
, struct file b
)
321 v
= malloc(sizeof(struct v
)*(a
.elcnt
+b
.elcnt
+2));
324 csl
= lcsl(&a
, 0, a
.elcnt
,
330 csl
= malloc(sizeof(*csl
));
338 struct csl
*diff_partial(struct file a
, struct file b
,
339 int alo
, int ahi
, int blo
, int bhi
)
343 v
= malloc(sizeof(struct v
)*(ahi
-alo
+bhi
-blo
+2));
346 csl
= lcsl(&a
, alo
, ahi
,
357 main(int argc
, char *argv
[])
361 struct elmnt
*lst
= malloc(argc
*sizeof(*lst
));
363 int alo
, ahi
, blo
, bhi
;
370 while (argv
[arg
] && strcmp(argv
[arg
],"--")) {
372 lst
->start
= argv
[arg
];
373 lst
->len
= strlen(argv
[arg
]);
385 while (argv
[arg
] && strcmp(argv
[arg
],"--")) {
387 lst
->start
= argv
[arg
];
388 lst
->len
= strlen(argv
[arg
]);
394 v
= malloc(sizeof(struct v
)*(a
.elcnt
+b
.elcnt
+2));
400 ln
= find_common(&a
, &b
,
401 &alo
, &ahi
, &blo
, &bhi
,
405 printf("ln=%d (%d,%d) -> (%d,%d)\n", ln
,
408 csl
= lcsl(&a
, 0, a
.elcnt
,
412 while (csl
&& csl
->len
) {
414 printf("%d,%d for %d:\n", csl
->a
,csl
->b
,csl
->len
);
415 for (i
=0; i
<csl
->len
; i
++) {
416 printf(" %.*s (%.*s)\n",
417 a
.list
[csl
->a
+i
].len
, a
.list
[csl
->a
+i
].start
,
418 b
.list
[csl
->b
+i
].len
, b
.list
[csl
->b
+i
].start
);