openat: don’t close (-1)
[gnulib.git] / lib / git-merge-changelog.c
blob60754c83671633d3dee3d54db99fc7ecfe73553b
1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
2 Copyright (C) 2008-2024 Bruno Haible <bruno@clisp.org>
4 This file is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published
6 by the Free Software Foundation, either version 3 of the License,
7 or (at your option) any later version.
9 This file is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* README:
18 The default merge driver of 'git' *always* produces conflicts when
19 pulling public modifications into a privately modified ChangeLog file.
20 This is because ChangeLog files are always modified at the top; the
21 default merge driver has no clue how to deal with this. Furthermore
22 the conflicts are presented with more <<<< ==== >>>> markers than
23 necessary; this is because the default merge driver makes pointless
24 efforts to look at the individual line changes inside a ChangeLog entry.
26 This program serves as a 'git' merge driver that avoids these problems.
27 1. It produces no conflict when ChangeLog entries have been inserted
28 at the top both in the public and in the private modification. It
29 puts the privately added entries above the publicly added entries.
30 2. It respects the structure of ChangeLog files: entries are not split
31 into lines but kept together.
32 3. It also handles the case of small modifications of past ChangeLog
33 entries, or of removed ChangeLog entries: they are merged as one
34 would expect it.
35 4. Conflicts are presented at the top of the file, rather than where
36 they occurred, so that the user will see them immediately. (Unlike
37 for source code written in some programming language, conflict markers
38 that are located several hundreds lines from the top will not cause
39 any syntax error and therefore would be likely to remain unnoticed.)
42 /* Installation:
44 $ ./gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
45 $ cd /tmp/testdir123
46 $ ./configure
47 $ make
48 $ make install
50 Additionally, for git users:
51 - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the
52 lines
54 [merge "merge-changelog"]
55 name = GNU-style ChangeLog merge driver
56 driver = /usr/local/bin/git-merge-changelog %O %A %B
58 - Add to the top-level directory of the checkout a file '.gitattributes'
59 with this line:
61 ChangeLog merge=merge-changelog
63 (See "man 5 gitattributes" for more info.)
65 Additionally, for bzr users:
66 - Install the 'extmerge' bzr plug-in listed at
67 <http://doc.bazaar.canonical.com/plugins/en/index.html>
68 <http://wiki.bazaar.canonical.com/BzrPlugins>
69 - Add to your $HOME/.bazaar/bazaar.conf the line
71 external_merge = git-merge-changelog %b %T %o
73 - Then, to merge a conflict in a ChangeLog file, use
75 $ bzr extmerge ChangeLog
77 Additionally, for hg users:
78 - Add to your $HOME/.hgrc the lines
80 [merge-patterns]
81 ChangeLog = git-merge-changelog
83 [merge-tools]
84 git-merge-changelog.executable = /usr/local/bin/git-merge-changelog
85 git-merge-changelog.args = $base $local $other
87 See <https://www.selenic.com/mercurial/hgrc.5.html> section merge-tools
88 for reference.
91 /* Use as an alternative to 'diff3':
92 git-merge-changelog performs the same role as "diff3 -m", just with
93 reordered arguments:
94 $ git-merge-changelog %O %A %B
95 is comparable to
96 $ diff3 -m %A %O %B
99 /* Calling convention:
100 A merge driver is called with three filename arguments:
101 1. %O = The common ancestor of %A and %B.
102 2. %A = The file's contents from the "current branch".
103 3. %B = The file's contents from the "other branch"; this is the contents
104 being merged in.
106 In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
107 maintainer to a central maintainer) or of a downstream pull with --rebase:
108 2. %A = The file's newest pulled contents; modified by other committers.
109 3. %B = The user's newest copy of the file; modified by the user.
110 In case of a downstream pull (e.g. from a central repository to the user)
111 or of an upstream pull with --rebase:
112 2. %A = The user's newest copy of the file; modified by the user.
113 3. %B = The file's newest pulled contents; modified by other committers.
115 It should write its merged output into file %A. It can also echo some
116 remarks to stdout. It should exit with return code 0 if the merge could
117 be resolved cleanly, or with non-zero return code if there were conflicts.
120 /* How it works:
121 The structure of a ChangeLog file: It consists of ChangeLog entries. A
122 ChangeLog entry starts at a line following a blank line and that starts with
123 a non-whitespace character, or at the beginning of a file.
124 The merge driver works as follows: It reads the three files into memory and
125 dissects them into ChangeLog entries. It then finds the differences between
126 %O and %B. They are classified as:
127 - removals (some consecutive entries removed),
128 - changes (some consecutive entries removed, some consecutive entries
129 added),
130 - additions (some consecutive entries added).
131 The driver then attempts to apply the changes to %A.
132 To this effect, it first computes a correspondence between the entries in %O
133 and the entries in %A, using fuzzy string matching to still identify changed
134 entries.
135 - Removals are applied one by one. If the entry is present in %A, at any
136 position, it is removed. If not, the removal is marked as a conflict.
137 - Additions at the top of %B are applied at the top of %A.
138 - Additions between entry x and entry y (y may be the file end) in %B are
139 applied between entry x and entry y in %A (if they still exist and are
140 still consecutive in %A), otherwise the additions are marked as a
141 conflict.
142 - Changes are categorized into "simple changes":
143 entry1 ... entryn
144 are mapped to
145 added_entry ... added_entry modified_entry1 ... modified_entryn,
146 where the correspondence between entry_i and modified_entry_i is still
147 clear; and "big changes": these are all the rest. Simple changes at the
148 top of %B are applied by putting the added entries at the top of %A. The
149 changes in simple changes are applied one by one; possibly leading to
150 single-entry conflicts. Big changes are applied en bloc, possibly
151 leading to conflicts spanning multiple entries.
152 - Conflicts are output at the top of the file and cause an exit status of
156 #include <config.h>
158 #include <getopt.h>
159 #include <limits.h>
160 #include <stdint.h>
161 #include <stdio.h>
162 #include <stdlib.h>
163 #include <string.h>
164 #include <sys/types.h>
165 #include <unistd.h>
167 #include <error.h>
168 #include "idx.h"
169 #include "read-file.h"
170 #include "gl_xlist.h"
171 #include "gl_array_list.h"
172 #include "gl_linkedhash_list.h"
173 #include "gl_rbtreehash_list.h"
174 #include "gl_linked_list.h"
175 #include "xalloc.h"
176 #include "xmalloca.h"
177 #include "fstrcmp.h"
178 #include "minmax.h"
179 #include "c-strstr.h"
180 #include "fwriteerror.h"
182 #define ASSERT(expr) \
183 do \
185 if (!(expr)) \
186 abort (); \
188 while (0)
190 #define FSTRCMP_THRESHOLD 0.6
191 #define FSTRCMP_STRICTER_THRESHOLD 0.8
193 /* Representation of a ChangeLog entry.
194 The string may contain NUL bytes; therefore it is represented as a plain
195 opaque memory region. */
196 struct entry
198 char *string;
199 idx_t length;
200 /* Cache for the hash code. */
201 bool hashcode_cached;
202 size_t hashcode;
205 /* Create an entry.
206 The memory region passed by the caller must of indefinite extent. It is
207 *not* copied here. */
208 static struct entry *
209 entry_create (char *string, idx_t length)
211 struct entry *result = XMALLOC (struct entry);
212 result->string = string;
213 result->length = length;
214 result->hashcode_cached = false;
215 return result;
218 /* Compare two entries for equality. */
219 static bool
220 entry_equals (const void *elt1, const void *elt2)
222 const struct entry *entry1 = (const struct entry *) elt1;
223 const struct entry *entry2 = (const struct entry *) elt2;
224 return entry1->length == entry2->length
225 && memcmp (entry1->string, entry2->string, entry1->length) == 0;
228 /* Return a hash code of the contents of a ChangeLog entry. */
229 static size_t
230 entry_hashcode (const void *elt)
232 struct entry *entry = (struct entry *) elt;
233 if (!entry->hashcode_cached)
235 /* See https://www.haible.de/bruno/hashfunc.html. */
236 const char *s;
237 idx_t n;
238 size_t h = 0;
240 for (s = entry->string, n = entry->length; n > 0; s++, n--)
241 h = (unsigned char) *s + ((h << 9) | (h >> (SIZE_WIDTH - 9)));
243 entry->hashcode = h;
244 entry->hashcode_cached = true;
246 return entry->hashcode;
249 /* Perform a fuzzy comparison of two ChangeLog entries.
250 Return a similarity measure of the two entries, a value between 0 and 1.
251 0 stands for very distinct, 1 for identical.
252 If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
253 be returned. */
254 static double
255 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
256 double lower_bound)
258 /* fstrcmp works only on NUL terminated strings. */
259 char *memory;
260 double similarity;
262 if (memchr (entry1->string, '\0', entry1->length) != NULL)
263 return 0.0;
264 if (memchr (entry2->string, '\0', entry2->length) != NULL)
265 return 0.0;
266 memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
268 char *p = memory;
269 memcpy (p, entry1->string, entry1->length);
270 p += entry1->length;
271 *p++ = '\0';
272 memcpy (p, entry2->string, entry2->length);
273 p += entry2->length;
274 *p++ = '\0';
276 similarity =
277 fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
278 freea (memory);
279 return similarity;
282 /* This structure represents an entire ChangeLog file, after it was read
283 into memory. */
284 struct changelog_file
286 /* The entries, as a list. */
287 gl_list_t /* <struct entry *> */ entries_list;
288 /* The entries, as a list in opposite direction. */
289 gl_list_t /* <struct entry *> */ entries_reversed;
290 /* The entries, as an array. */
291 idx_t num_entries;
292 struct entry **entries;
295 /* Read a ChangeLog file into memory.
296 Return the contents in *RESULT. */
297 static void
298 read_changelog_file (const char *filename, struct changelog_file *result)
300 /* Read the file in text mode, otherwise it's hard to recognize empty
301 lines. */
302 size_t length;
303 char *contents = read_file (filename, 0, &length);
304 if (contents == NULL)
306 fprintf (stderr, "could not read file '%s'\n", filename);
307 exit (EXIT_FAILURE);
310 result->entries_list =
311 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
312 NULL, true);
313 result->entries_reversed =
314 gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
315 NULL, true);
316 /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts
317 at a line following a blank line and that starts with a non-whitespace
318 character, or at the beginning of a file.
319 Split the file contents into entries. */
321 char *contents_end = contents + length;
322 char *start = contents;
323 while (start < contents_end)
325 /* Search the end of the current entry. */
326 char *ptr = start;
327 struct entry *curr;
329 while (ptr < contents_end)
331 ptr = memchr (ptr, '\n', contents_end - ptr);
332 if (ptr == NULL)
334 ptr = contents_end;
335 break;
337 ptr++;
338 if (contents_end - ptr >= 2
339 && ptr[0] == '\n'
340 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
342 ptr++;
343 break;
347 curr = entry_create (start, ptr - start);
348 gl_list_add_last (result->entries_list, curr);
349 gl_list_add_first (result->entries_reversed, curr);
351 start = ptr;
355 result->num_entries = gl_list_size (result->entries_list);
356 result->entries = XNMALLOC (result->num_entries, struct entry *);
358 idx_t index = 0;
359 gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
360 const void *elt;
361 gl_list_node_t node;
362 while (gl_list_iterator_next (&iter, &elt, &node))
363 result->entries[index++] = (struct entry *) elt;
364 gl_list_iterator_free (&iter);
365 ASSERT (index == result->num_entries);
369 /* A mapping (correspondence) between entries of FILE1 and of FILE2. */
370 struct entries_mapping
372 struct changelog_file *file1;
373 struct changelog_file *file2;
374 /* Mapping from indices in FILE1 to indices in FILE2.
375 A value -1 means that the entry from FILE1 is not found in FILE2.
376 A value -2 means that it has not yet been computed. */
377 ptrdiff_t *index_mapping;
378 /* Mapping from indices in FILE2 to indices in FILE1.
379 A value -1 means that the entry from FILE2 is not found in FILE1.
380 A value -2 means that it has not yet been computed. */
381 ptrdiff_t *index_mapping_reverse;
384 /* Look up (or lazily compute) the mapping of an entry in FILE1.
385 i is the index in FILE1.
386 Return the index in FILE2, or -1 when the entry is not found in FILE2. */
387 static ptrdiff_t
388 entries_mapping_get (struct entries_mapping *mapping, idx_t i)
390 if (mapping->index_mapping[i] < -1)
392 struct changelog_file *file1 = mapping->file1;
393 struct changelog_file *file2 = mapping->file2;
394 idx_t n1 = file1->num_entries;
395 idx_t n2 = file2->num_entries;
396 struct entry *entry_i = file1->entries[i];
397 idx_t j;
399 /* Search whether it approximately occurs in file2. */
400 ptrdiff_t best_j = -1;
401 double best_j_similarity = 0.0;
402 for (j = n2; j > 0; )
404 j--;
405 if (mapping->index_mapping_reverse[j] < 0)
407 double similarity =
408 entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
409 if (similarity > best_j_similarity)
411 best_j = j;
412 best_j_similarity = similarity;
416 if (best_j_similarity >= FSTRCMP_THRESHOLD)
418 /* Found a similar entry in file2. */
419 struct entry *entry_j = file2->entries[best_j];
420 /* Search whether it approximately occurs in file1 at index i. */
421 ptrdiff_t best_i = -1;
422 double best_i_similarity = 0.0;
423 idx_t ii;
424 for (ii = n1; ii > 0; )
426 ii--;
427 if (mapping->index_mapping[ii] < 0)
429 double similarity =
430 entry_fstrcmp (file1->entries[ii], entry_j,
431 best_i_similarity);
432 if (similarity > best_i_similarity)
434 best_i = ii;
435 best_i_similarity = similarity;
439 if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
441 mapping->index_mapping[i] = best_j;
442 mapping->index_mapping_reverse[best_j] = i;
445 if (mapping->index_mapping[i] < -1)
446 /* It does not approximately occur in FILE2.
447 Remember it, for next time. */
448 mapping->index_mapping[i] = -1;
450 return mapping->index_mapping[i];
453 /* Look up (or lazily compute) the mapping of an entry in FILE2.
454 j is the index in FILE2.
455 Return the index in FILE1, or -1 when the entry is not found in FILE1. */
456 static ptrdiff_t
457 entries_mapping_reverse_get (struct entries_mapping *mapping, idx_t j)
459 if (mapping->index_mapping_reverse[j] < -1)
461 struct changelog_file *file1 = mapping->file1;
462 struct changelog_file *file2 = mapping->file2;
463 idx_t n1 = file1->num_entries;
464 idx_t n2 = file2->num_entries;
465 struct entry *entry_j = file2->entries[j];
466 idx_t i;
468 /* Search whether it approximately occurs in file1. */
469 ptrdiff_t best_i = -1;
470 double best_i_similarity = 0.0;
471 for (i = n1; i > 0; )
473 i--;
474 if (mapping->index_mapping[i] < 0)
476 double similarity =
477 entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity);
478 if (similarity > best_i_similarity)
480 best_i = i;
481 best_i_similarity = similarity;
485 if (best_i_similarity >= FSTRCMP_THRESHOLD)
487 /* Found a similar entry in file1. */
488 struct entry *entry_i = file1->entries[best_i];
489 /* Search whether it approximately occurs in file2 at index j. */
490 ptrdiff_t best_j = -1;
491 double best_j_similarity = 0.0;
492 idx_t jj;
493 for (jj = n2; jj > 0; )
495 jj--;
496 if (mapping->index_mapping_reverse[jj] < 0)
498 double similarity =
499 entry_fstrcmp (entry_i, file2->entries[jj],
500 best_j_similarity);
501 if (similarity > best_j_similarity)
503 best_j = jj;
504 best_j_similarity = similarity;
508 if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j)
510 mapping->index_mapping_reverse[j] = best_i;
511 mapping->index_mapping[best_i] = j;
514 if (mapping->index_mapping_reverse[j] < -1)
515 /* It does not approximately occur in FILE1.
516 Remember it, for next time. */
517 mapping->index_mapping_reverse[j] = -1;
519 return mapping->index_mapping_reverse[j];
522 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
523 The correspondence also takes into account small modifications; i.e. the
524 indicated relation is not equality of entries but best-match similarity
525 of entries.
526 If FULL is true, the maximum of matching is done up-front. If it is false,
527 it is done in a lazy way through the functions entries_mapping_get and
528 entries_mapping_reverse_get.
529 Return the result in *RESULT. */
530 static void
531 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
532 bool full,
533 struct entries_mapping *result)
535 /* Mapping from indices in file1 to indices in file2. */
536 ptrdiff_t *index_mapping;
537 /* Mapping from indices in file2 to indices in file1. */
538 ptrdiff_t *index_mapping_reverse;
539 idx_t n1 = file1->num_entries;
540 idx_t n2 = file2->num_entries;
541 idx_t i, j;
543 index_mapping = XNMALLOC (n1, ptrdiff_t);
544 for (i = 0; i < n1; i++)
545 index_mapping[i] = -2;
547 index_mapping_reverse = XNMALLOC (n2, ptrdiff_t);
548 for (j = 0; j < n2; j++)
549 index_mapping_reverse[j] = -2;
551 for (i = n1; i > 0; )
553 i--;
554 /* Take an entry from file1. */
555 if (index_mapping[i] < -1)
557 struct entry *entry = file1->entries[i];
558 /* Search whether it occurs in file2. */
559 size_t jrev = gl_list_indexof (file2->entries_reversed, entry);
560 if (jrev != (size_t)(-1))
562 j = n2 - 1 - jrev;
563 /* Found an exact correspondence. */
564 /* If index_mapping_reverse[j] >= 0, we have already seen other
565 copies of this entry, and there were more occurrences of it in
566 file1 than in file2. In this case, do nothing. */
567 if (index_mapping_reverse[j] < 0)
569 index_mapping[i] = j;
570 index_mapping_reverse[j] = i;
571 /* Look for more occurrences of the same entry. Match them
572 as long as they pair up. Unpaired occurrences of the same
573 entry are left without mapping. */
575 idx_t curr_i = i;
576 idx_t curr_j = j;
578 for (;;)
580 size_t next_i =
581 gl_list_indexof_from (file1->entries_reversed,
582 n1 - curr_i, entry);
583 if (next_i == (size_t)(-1))
584 break;
585 size_t next_j =
586 gl_list_indexof_from (file2->entries_reversed,
587 n2 - curr_j, entry);
588 if (next_j == (size_t)(-1))
589 break;
590 curr_i = n1 - 1 - next_i;
591 curr_j = n2 - 1 - next_j;
592 ASSERT (index_mapping[curr_i] < 0);
593 ASSERT (index_mapping_reverse[curr_j] < 0);
594 index_mapping[curr_i] = curr_j;
595 index_mapping_reverse[curr_j] = curr_i;
603 result->file1 = file1;
604 result->file2 = file2;
605 result->index_mapping = index_mapping;
606 result->index_mapping_reverse = index_mapping_reverse;
608 if (full)
609 for (i = n1; i > 0; )
611 i--;
612 entries_mapping_get (result, i);
616 /* An "edit" is a textual modification performed by the user, that needs to
617 be applied to the other file. */
618 enum edit_type
620 /* Some consecutive entries were added. */
621 ADDITION,
622 /* Some consecutive entries were removed; some other consecutive entries
623 were added at the same position. (Not necessarily the same number of
624 entries.) */
625 CHANGE,
626 /* Some consecutive entries were removed. */
627 REMOVAL
630 /* This structure represents an edit. */
631 struct edit
633 enum edit_type type;
634 /* Range of indices into the entries of FILE1. */
635 idx_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */
636 /* Range of indices into the entries of FILE2. */
637 idx_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */
640 /* This structure represents the differences from one file, FILE1, to another
641 file, FILE2. */
642 struct differences
644 /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
645 from FILE1 is not found in FILE2). */
646 ptrdiff_t *index_mapping;
647 /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
648 from FILE2 is not found in FILE1). */
649 ptrdiff_t *index_mapping_reverse;
650 /* The edits that transform FILE1 into FILE2. */
651 idx_t num_edits;
652 struct edit **edits;
655 /* Import the difference detection algorithm from GNU diff. */
656 #define ELEMENT struct entry *
657 #define EQUAL entry_equals
658 #define OFFSET ptrdiff_t
659 #define OFFSET_MAX PTRDIFF_MAX
660 #define EXTRA_CONTEXT_FIELDS \
661 ptrdiff_t *index_mapping; \
662 ptrdiff_t *index_mapping_reverse;
663 #define NOTE_DELETE(ctxt, xoff) \
664 ctxt->index_mapping[xoff] = -1
665 #define NOTE_INSERT(ctxt, yoff) \
666 ctxt->index_mapping_reverse[yoff] = -1
667 #include "diffseq.h"
669 /* Compute the differences between the entries of FILE1 and the entries of
670 FILE2. */
671 static void
672 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
673 struct differences *result)
675 /* Unlike compute_mapping, which mostly ignores the order of the entries and
676 therefore works well when some entries are permuted, here we use the order.
677 I think this is needed in order to distinguish changes from
678 additions+removals; I don't know how to say what is a "change" if the
679 files are considered as unordered sets of entries. */
680 struct context ctxt;
681 idx_t n1 = file1->num_entries;
682 idx_t n2 = file2->num_entries;
683 idx_t i;
684 idx_t j;
685 gl_list_t /* <struct edit *> */ edits;
687 ctxt.xvec = file1->entries;
688 ctxt.yvec = file2->entries;
689 ctxt.index_mapping = XNMALLOC (n1, ptrdiff_t);
690 for (i = 0; i < n1; i++)
691 ctxt.index_mapping[i] = 0;
692 ctxt.index_mapping_reverse = XNMALLOC (n2, ptrdiff_t);
693 for (j = 0; j < n2; j++)
694 ctxt.index_mapping_reverse[j] = 0;
695 ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ptrdiff_t) + n2 + 1;
696 ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
697 ctxt.too_expensive = n1 + n2;
699 /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
700 each removed or added entry. */
701 compareseq (0, n1, 0, n2, 0, &ctxt);
703 /* Complete the index_mapping and index_mapping_reverse arrays. */
704 i = 0;
705 j = 0;
706 while (i < n1 || j < n2)
708 while (i < n1 && ctxt.index_mapping[i] < 0)
709 i++;
710 while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
711 j++;
712 ASSERT ((i < n1) == (j < n2));
713 if (i == n1 && j == n2)
714 break;
715 ctxt.index_mapping[i] = j;
716 ctxt.index_mapping_reverse[j] = i;
717 i++;
718 j++;
721 /* Create the edits. */
722 edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
723 i = 0;
724 j = 0;
725 while (i < n1 || j < n2)
727 if (i == n1)
729 struct edit *e;
730 ASSERT (j < n2);
731 e = XMALLOC (struct edit);
732 e->type = ADDITION;
733 e->j1 = j;
734 e->j2 = n2 - 1;
735 gl_list_add_last (edits, e);
736 break;
738 if (j == n2)
740 struct edit *e;
741 ASSERT (i < n1);
742 e = XMALLOC (struct edit);
743 e->type = REMOVAL;
744 e->i1 = i;
745 e->i2 = n1 - 1;
746 gl_list_add_last (edits, e);
747 break;
749 if (ctxt.index_mapping[i] >= 0)
751 if (ctxt.index_mapping_reverse[j] >= 0)
753 ASSERT (ctxt.index_mapping[i] == j);
754 ASSERT (ctxt.index_mapping_reverse[j] == i);
755 i++;
756 j++;
758 else
760 struct edit *e;
761 ASSERT (ctxt.index_mapping_reverse[j] < 0);
762 e = XMALLOC (struct edit);
763 e->type = ADDITION;
764 e->j1 = j;
766 j++;
767 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
768 e->j2 = j - 1;
769 gl_list_add_last (edits, e);
772 else
774 if (ctxt.index_mapping_reverse[j] >= 0)
776 struct edit *e;
777 ASSERT (ctxt.index_mapping[i] < 0);
778 e = XMALLOC (struct edit);
779 e->type = REMOVAL;
780 e->i1 = i;
782 i++;
783 while (i < n1 && ctxt.index_mapping[i] < 0);
784 e->i2 = i - 1;
785 gl_list_add_last (edits, e);
787 else
789 struct edit *e;
790 ASSERT (ctxt.index_mapping[i] < 0);
791 ASSERT (ctxt.index_mapping_reverse[j] < 0);
792 e = XMALLOC (struct edit);
793 e->type = CHANGE;
794 e->i1 = i;
796 i++;
797 while (i < n1 && ctxt.index_mapping[i] < 0);
798 e->i2 = i - 1;
799 e->j1 = j;
801 j++;
802 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
803 e->j2 = j - 1;
804 gl_list_add_last (edits, e);
809 result->index_mapping = ctxt.index_mapping;
810 result->index_mapping_reverse = ctxt.index_mapping_reverse;
811 result->num_edits = gl_list_size (edits);
812 result->edits = XNMALLOC (result->num_edits, struct edit *);
814 idx_t index = 0;
815 gl_list_iterator_t iter = gl_list_iterator (edits);
816 const void *elt;
817 gl_list_node_t node;
818 while (gl_list_iterator_next (&iter, &elt, &node))
819 result->edits[index++] = (struct edit *) elt;
820 gl_list_iterator_free (&iter);
821 ASSERT (index == result->num_edits);
825 /* An empty entry. */
826 static struct entry empty_entry = { NULL, 0 };
828 /* Return the end a paragraph.
829 ENTRY is an entry.
830 OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
831 Return the offset of the end of paragraph, as an offset <= ENTRY->length;
832 it is the start of a blank line or the end of the entry. */
833 static idx_t
834 find_paragraph_end (const struct entry *entry, idx_t offset)
836 const char *string = entry->string;
837 idx_t length = entry->length;
839 for (;;)
841 const char *nl = memchr (string + offset, '\n', length - offset);
842 if (nl == NULL)
843 return length;
844 offset = (nl - string) + 1;
845 if (offset < length && string[offset] == '\n')
846 return offset;
850 /* Split a merged entry.
851 Given an old entry of the form
852 TITLE
853 BODY
854 and a new entry of the form
855 TITLE
856 BODY1
857 BODY'
858 where the two titles are the same and BODY and BODY' are very similar,
859 this computes two new entries
860 TITLE
861 BODY1
863 TITLE
864 BODY'
865 and returns true.
866 If the entries don't have this form, it returns false. */
867 static bool
868 try_split_merged_entry (const struct entry *old_entry,
869 const struct entry *new_entry,
870 struct entry *new_split[2])
872 idx_t old_title_len = find_paragraph_end (old_entry, 0);
873 idx_t new_title_len = find_paragraph_end (new_entry, 0);
874 struct entry old_body;
875 struct entry new_body;
876 idx_t best_split_offset;
877 double best_similarity;
878 idx_t split_offset;
880 /* Same title? */
881 if (!(old_title_len == new_title_len
882 && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
883 return false;
885 old_body.string = old_entry->string + old_title_len;
886 old_body.length = old_entry->length - old_title_len;
888 /* Determine where to split the new entry.
889 This is done by maximizing the similarity between BODY and BODY'. */
890 best_split_offset = split_offset = new_title_len;
891 best_similarity = 0.0;
892 for (;;)
894 double similarity;
896 new_body.string = new_entry->string + split_offset;
897 new_body.length = new_entry->length - split_offset;
898 similarity =
899 entry_fstrcmp (&old_body, &new_body, best_similarity);
900 if (similarity > best_similarity)
902 best_split_offset = split_offset;
903 best_similarity = similarity;
905 if (best_similarity == 1.0)
906 /* It cannot get better. */
907 break;
909 if (split_offset < new_entry->length)
910 split_offset = find_paragraph_end (new_entry, split_offset + 1);
911 else
912 break;
915 /* BODY' should not be empty. */
916 if (best_split_offset == new_entry->length)
917 return false;
918 ASSERT (new_entry->string[best_split_offset] == '\n');
920 /* A certain similarity between BODY and BODY' is required. */
921 if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
922 return false;
924 new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
927 idx_t len1 = new_title_len;
928 idx_t len2 = new_entry->length - best_split_offset;
929 char *combined = XNMALLOC (len1 + len2, char);
930 memcpy (combined, new_entry->string, len1);
931 memcpy (combined + len1, new_entry->string + best_split_offset, len2);
932 new_split[1] = entry_create (combined, len1 + len2);
935 return true;
938 /* Write the contents of an entry to the output stream FP. */
939 static void
940 entry_write (FILE *fp, struct entry *entry)
942 if (entry->length > 0)
943 fwrite (entry->string, 1, entry->length, fp);
946 /* This structure represents a conflict.
947 A conflict can occur for various reasons. */
948 struct conflict
950 /* Parts from the ancestor file. */
951 idx_t num_old_entries;
952 struct entry **old_entries;
953 /* Parts of the modified file. */
954 idx_t num_modified_entries;
955 struct entry **modified_entries;
958 /* Write a conflict to the output stream FP, including markers. */
959 static void
960 conflict_write (FILE *fp, struct conflict *c)
962 idx_t i;
964 /* Use the same syntax as git's default merge driver.
965 The spaces after <<<<<<< and >>>>>>> are for compatibility with
966 git/rerere.c, function 'is_cmarker'. Usually they would be followed by
967 branch or version names, but this info is not available to us here.
968 Don't indent the contents of the entries (with things like ">" or "-"),
969 otherwise the user needs more textual editing to resolve the conflict. */
970 fputs ("<<<<<<< \n", fp);
971 for (i = 0; i < c->num_old_entries; i++)
972 entry_write (fp, c->old_entries[i]);
973 fputs ("=======\n", fp);
974 for (i = 0; i < c->num_modified_entries; i++)
975 entry_write (fp, c->modified_entries[i]);
976 fputs (">>>>>>> \n", fp);
979 /* Long options. */
980 static const struct option long_options[] =
982 { "help", no_argument, NULL, 'h' },
983 { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
984 { "version", no_argument, NULL, 'V' },
985 { NULL, 0, NULL, 0 }
988 /* Print a usage message and exit. */
989 static void
990 usage (int status)
992 if (status != EXIT_SUCCESS)
993 fprintf (stderr, "Try '%s --help' for more information.\n",
994 getprogname ());
995 else
997 printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
998 getprogname ());
999 printf ("\n");
1000 printf ("Merges independent modifications of a ChangeLog style file.\n");
1001 printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
1002 printf ("A-FILE-NAME names the publicly modified file.\n");
1003 printf ("B-FILE-NAME names the user-modified file.\n");
1004 printf ("Writes the merged file into A-FILE-NAME.\n");
1005 printf ("\n");
1006 #if 0 /* --split-merged-entry is now on by default. */
1007 printf ("Operation modifiers:\n");
1008 printf ("\
1009 --split-merged-entry Possibly split a merged entry between paragraphs.\n\
1010 Use this if you have the habit to merge unrelated\n\
1011 entries into a single one, separated only by a\n\
1012 newline, just because they happened on the same\n\
1013 date.\n");
1014 printf ("\n");
1015 #endif
1016 printf ("Informative output:\n");
1017 printf (" -h, --help display this help and exit\n");
1018 printf (" -V, --version output version information and exit\n");
1019 printf ("\n");
1020 fputs ("Report bugs to <bug-gnulib@gnu.org>.\n",
1021 stdout);
1024 exit (status);
1028 main (int argc, char *argv[])
1030 int optchar;
1031 bool do_help;
1032 bool do_version;
1033 bool split_merged_entry;
1035 /* Set default values for variables. */
1036 do_help = false;
1037 do_version = false;
1038 split_merged_entry = true;
1040 /* Parse command line options. */
1041 while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
1042 switch (optchar)
1044 case '\0': /* Long option. */
1045 break;
1046 case 'h':
1047 do_help = true;
1048 break;
1049 case 'V':
1050 do_version = true;
1051 break;
1052 case CHAR_MAX + 1: /* --split-merged-entry */
1053 break;
1054 default:
1055 usage (EXIT_FAILURE);
1058 if (do_version)
1060 /* Version information is requested. */
1061 printf ("%s\n", getprogname ());
1062 printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
1063 License GPLv2+: GNU GPL version 2 or later <https://gnu.org/licenses/gpl.html>\n\
1064 This is free software: you are free to change and redistribute it.\n\
1065 There is NO WARRANTY, to the extent permitted by law.\n\
1067 "2020");
1068 printf ("Written by %s.\n", "Bruno Haible");
1069 exit (EXIT_SUCCESS);
1072 if (do_help)
1074 /* Help is requested. */
1075 usage (EXIT_SUCCESS);
1078 /* Test argument count. */
1079 if (optind + 3 != argc)
1080 error (EXIT_FAILURE, 0, "expected three arguments");
1083 const char *ancestor_file_name; /* O-FILE-NAME */
1084 const char *destination_file_name; /* A-FILE-NAME */
1085 bool downstream;
1086 const char *other_file_name; /* B-FILE-NAME */
1087 const char *mainstream_file_name;
1088 const char *modified_file_name;
1089 struct changelog_file ancestor_file;
1090 struct changelog_file mainstream_file;
1091 struct changelog_file modified_file;
1092 /* Mapping from indices in ancestor_file to indices in mainstream_file. */
1093 struct entries_mapping mapping;
1094 struct differences diffs;
1095 gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
1096 gl_list_t /* <struct entry *> */ result_entries;
1097 gl_list_t /* <struct conflict *> */ result_conflicts;
1099 ancestor_file_name = argv[optind];
1100 destination_file_name = argv[optind + 1];
1101 other_file_name = argv[optind + 2];
1103 /* Heuristic to determine whether it's a pull in downstream direction
1104 (e.g. pull from a centralized server) or a pull in upstream direction
1105 (e.g. "git stash apply").
1107 For ChangeLog this distinction is important. The difference between
1108 an "upstream" and a "downstream" repository is that more people are
1109 looking at the "upstream" repository. They want to be informed about
1110 changes and expect them to be shown at the top of the ChangeLog.
1111 When a user pulls downstream, on the other hand, he has two options:
1112 a) He gets the change entries from the central repository also at the
1113 top of his ChangeLog, and his own changes come after them.
1114 b) He gets the change entries from the central repository after those
1115 he has collected for his branch. His own change entries stay at
1116 the top of the ChangeLog file.
1117 In the case a) he has to reorder the ChangeLog before he can commit.
1118 No one does that. So most people want b).
1119 In other words, the order of entries in a ChangeLog should represent
1120 the order in which they have flown (or will flow) into the *central*
1121 repository.
1123 But in git this is fundamentally indistinguishable, because when Linus
1124 pulls patches from akpm and akpm pulls patches from Linus, it's not
1125 clear which of the two is more "upstream". Also, when you have many
1126 branches in a repository and pull from one to another, "git" has no way
1127 to know which branch is more "upstream" than the other. The git-tag(1)
1128 manual page also says:
1129 "One important aspect of git is it is distributed, and being
1130 distributed largely means there is no inherent "upstream" or
1131 "downstream" in the system."
1132 Therefore anyone who attempts to produce a ChangeLog from the merge
1133 history will fail.
1135 Here we allow the user to specify the pull direction through an
1136 environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two
1137 environment variables are not set, we assume a "simple single user"
1138 usage pattern: He manages local changes through stashes and uses
1139 "git pull" only to pull downstream.
1141 How to distinguish these situation? There are several hints:
1142 - During a "git stash apply", GIT_REFLOG_ACTION is not set. During
1143 a "git pull", it is set to 'pull '. During a "git pull --rebase",
1144 it is set to 'pull --rebase'. During a "git cherry-pick", it is
1145 set to 'cherry-pick'.
1146 - During a "git stash apply", there is an environment variable of
1147 the form GITHEAD_<40_hex_digits>='Stashed changes'. */
1149 const char *var;
1151 var = getenv ("GIT_DOWNSTREAM");
1152 if (var != NULL && var[0] != '\0')
1153 downstream = true;
1154 else
1156 var = getenv ("GIT_UPSTREAM");
1157 if (var != NULL && var[0] != '\0')
1158 downstream = false;
1159 else
1161 var = getenv ("GIT_REFLOG_ACTION");
1162 #if 0 /* Debugging code */
1163 printf ("GIT_REFLOG_ACTION=|%s|\n", var);
1164 #endif
1165 if (var != NULL
1166 && ((strncmp (var, "pull", 4) == 0
1167 && c_strstr (var, " --rebase") == NULL)
1168 || strncmp (var, "merge origin", 12) == 0))
1169 downstream = true;
1170 else
1172 /* "git stash apply", "git rebase", "git cherry-pick" and
1173 similar. */
1174 downstream = false;
1180 #if 0 /* Debugging code */
1182 char buf[1000];
1183 printf ("First line of %%A:\n");
1184 sprintf (buf, "head -n 1 %s", destination_file_name); system (buf);
1185 printf ("First line of %%B:\n");
1186 sprintf (buf, "head -n 1 %s", other_file_name); system (buf);
1187 printf ("Guessing calling convention: %s\n",
1188 downstream
1189 ? "%A = modified by user, %B = upstream"
1190 : "%A = upstream, %B = modified by user");
1192 #endif
1194 if (downstream)
1196 mainstream_file_name = other_file_name;
1197 modified_file_name = destination_file_name;
1199 else
1201 mainstream_file_name = destination_file_name;
1202 modified_file_name = other_file_name;
1205 /* Read the three files into memory. */
1206 read_changelog_file (ancestor_file_name, &ancestor_file);
1207 read_changelog_file (mainstream_file_name, &mainstream_file);
1208 read_changelog_file (modified_file_name, &modified_file);
1210 /* Compute correspondence between the entries of ancestor_file and of
1211 mainstream_file. */
1212 compute_mapping (&ancestor_file, &mainstream_file, false, &mapping);
1213 (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */
1215 /* Compute differences between the entries of ancestor_file and of
1216 modified_file. */
1217 compute_differences (&ancestor_file, &modified_file, &diffs);
1219 /* Compute the result. */
1220 result_entries_pointers =
1221 XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1222 result_entries =
1223 gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1224 NULL, true);
1226 idx_t k;
1227 for (k = 0; k < mainstream_file.num_entries; k++)
1228 result_entries_pointers[k] =
1229 gl_list_add_last (result_entries, mainstream_file.entries[k]);
1231 result_conflicts =
1232 gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1234 idx_t e;
1235 for (e = 0; e < diffs.num_edits; e++)
1237 struct edit *edit = diffs.edits[e];
1238 switch (edit->type)
1240 case ADDITION:
1241 if (edit->j1 == 0)
1243 /* An addition to the top of modified_file.
1244 Apply it to the top of mainstream_file. */
1245 idx_t j;
1246 for (j = edit->j2; j > edit->j1; )
1248 j--;
1249 struct entry *added_entry = modified_file.entries[j];
1250 gl_list_add_first (result_entries, added_entry);
1253 else
1255 idx_t i_before;
1256 idx_t i_after;
1257 ptrdiff_t k_before;
1258 ptrdiff_t k_after;
1259 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1260 ASSERT (i_before >= 0);
1261 i_after = (edit->j2 + 1 == modified_file.num_entries
1262 ? ancestor_file.num_entries
1263 : diffs.index_mapping_reverse[edit->j2 + 1]);
1264 ASSERT (i_after >= 0);
1265 ASSERT (i_after == i_before + 1);
1266 /* An addition between ancestor_file.entries[i_before] and
1267 ancestor_file.entries[i_after]. See whether these two
1268 entries still exist in mainstream_file and are still
1269 consecutive. */
1270 k_before = entries_mapping_get (&mapping, i_before);
1271 k_after = (i_after == ancestor_file.num_entries
1272 ? mainstream_file.num_entries
1273 : entries_mapping_get (&mapping, i_after));
1274 if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1276 /* Yes, the entry before and after are still neighbours
1277 in mainstream_file. Apply the addition between
1278 them. */
1279 if (k_after == mainstream_file.num_entries)
1281 idx_t j;
1282 for (j = edit->j1; j <= edit->j2; j++)
1284 struct entry *added_entry = modified_file.entries[j];
1285 gl_list_add_last (result_entries, added_entry);
1288 else
1290 gl_list_node_t node_k_after = result_entries_pointers[k_after];
1291 idx_t j;
1292 for (j = edit->j1; j <= edit->j2; j++)
1294 struct entry *added_entry = modified_file.entries[j];
1295 gl_list_add_before (result_entries, node_k_after, added_entry);
1299 else
1301 /* It's not clear where the additions should be applied.
1302 Let the user decide. */
1303 struct conflict *c = XMALLOC (struct conflict);
1304 idx_t j;
1305 c->num_old_entries = 0;
1306 c->old_entries = NULL;
1307 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1308 c->modified_entries =
1309 XNMALLOC (c->num_modified_entries, struct entry *);
1310 for (j = edit->j1; j <= edit->j2; j++)
1311 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1312 gl_list_add_last (result_conflicts, c);
1315 break;
1316 case REMOVAL:
1318 /* Apply the removals one by one. */
1319 idx_t i;
1320 for (i = edit->i1; i <= edit->i2; i++)
1322 struct entry *removed_entry = ancestor_file.entries[i];
1323 ptrdiff_t k = entries_mapping_get (&mapping, i);
1324 if (k >= 0
1325 && entry_equals (removed_entry,
1326 mainstream_file.entries[k]))
1328 /* The entry to be removed still exists in
1329 mainstream_file. Remove it. */
1330 gl_list_node_set_value (result_entries,
1331 result_entries_pointers[k],
1332 &empty_entry);
1334 else
1336 /* The entry to be removed was already removed or was
1337 modified. This is a conflict. */
1338 struct conflict *c = XMALLOC (struct conflict);
1339 c->num_old_entries = 1;
1340 c->old_entries =
1341 XNMALLOC (c->num_old_entries, struct entry *);
1342 c->old_entries[0] = removed_entry;
1343 c->num_modified_entries = 0;
1344 c->modified_entries = NULL;
1345 gl_list_add_last (result_conflicts, c);
1349 break;
1350 case CHANGE:
1352 bool done = false;
1353 /* When the user usually merges entries from the same day,
1354 and this edit is at the top of the file: */
1355 if (split_merged_entry && edit->j1 == 0)
1357 /* Test whether the change is "simple merged", i.e. whether
1358 it consists of additions, followed by an augmentation of
1359 the first changed entry, followed by small changes of the
1360 remaining entries:
1361 entry_1
1362 entry_2
1364 entry_n
1365 are mapped to
1366 added_entry
1368 added_entry
1369 augmented_entry_1
1370 modified_entry_2
1372 modified_entry_n. */
1373 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1375 struct entry *split[2];
1376 bool simple_merged =
1377 try_split_merged_entry (ancestor_file.entries[edit->i1],
1378 modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1379 split);
1380 if (simple_merged)
1382 idx_t i;
1383 for (i = edit->i1 + 1; i <= edit->i2; i++)
1384 if (entry_fstrcmp (ancestor_file.entries[i],
1385 modified_file.entries[i + edit->j2 - edit->i2],
1386 FSTRCMP_THRESHOLD)
1387 < FSTRCMP_THRESHOLD)
1389 simple_merged = false;
1390 break;
1393 if (simple_merged)
1395 /* Apply the additions at the top of modified_file.
1396 Apply each of the single-entry changes
1397 separately. */
1398 idx_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1399 idx_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1400 idx_t j;
1401 /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */
1402 gl_list_add_first (result_entries, split[0]);
1403 /* The additions. */
1404 for (j = edit->j1 + num_added; j > edit->j1; )
1406 j--;
1407 struct entry *added_entry = modified_file.entries[j];
1408 gl_list_add_first (result_entries, added_entry);
1410 /* Now the single-entry changes. */
1411 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1413 struct entry *changed_entry =
1414 (j == edit->j1 + num_added
1415 ? split[1]
1416 : modified_file.entries[j]);
1417 idx_t i = j + edit->i2 - edit->j2;
1418 ptrdiff_t k = entries_mapping_get (&mapping, i);
1419 if (k >= 0
1420 && entry_equals (ancestor_file.entries[i],
1421 mainstream_file.entries[k]))
1423 gl_list_node_set_value (result_entries,
1424 result_entries_pointers[k],
1425 changed_entry);
1427 else if (!entry_equals (ancestor_file.entries[i],
1428 changed_entry))
1430 struct conflict *c = XMALLOC (struct conflict);
1431 c->num_old_entries = 1;
1432 c->old_entries =
1433 XNMALLOC (c->num_old_entries, struct entry *);
1434 c->old_entries[0] = ancestor_file.entries[i];
1435 c->num_modified_entries = 1;
1436 c->modified_entries =
1437 XNMALLOC (c->num_modified_entries, struct entry *);
1438 c->modified_entries[0] = changed_entry;
1439 gl_list_add_last (result_conflicts, c);
1442 done = true;
1446 if (!done)
1448 bool simple;
1449 /* Test whether the change is "simple", i.e. whether it
1450 consists of small changes to the old ChangeLog entries
1451 and additions before them:
1452 entry_1
1454 entry_n
1455 are mapped to
1456 added_entry
1458 added_entry
1459 modified_entry_1
1461 modified_entry_n. */
1462 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1464 idx_t i;
1465 simple = true;
1466 for (i = edit->i1; i <= edit->i2; i++)
1467 if (entry_fstrcmp (ancestor_file.entries[i],
1468 modified_file.entries[i + edit->j2 - edit->i2],
1469 FSTRCMP_THRESHOLD)
1470 < FSTRCMP_THRESHOLD)
1472 simple = false;
1473 break;
1476 else
1477 simple = false;
1478 if (simple)
1480 /* Apply the additions and each of the single-entry
1481 changes separately. */
1482 idx_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1483 idx_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1484 if (edit->j1 == 0)
1486 /* A simple change at the top of modified_file.
1487 Apply it to the top of mainstream_file. */
1488 idx_t j;
1489 for (j = edit->j1 + num_added; j > edit->j1; )
1491 j--;
1492 struct entry *added_entry = modified_file.entries[j];
1493 gl_list_add_first (result_entries, added_entry);
1495 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1497 struct entry *changed_entry = modified_file.entries[j];
1498 idx_t i = j + edit->i2 - edit->j2;
1499 ptrdiff_t k = entries_mapping_get (&mapping, i);
1500 if (k >= 0
1501 && entry_equals (ancestor_file.entries[i],
1502 mainstream_file.entries[k]))
1504 gl_list_node_set_value (result_entries,
1505 result_entries_pointers[k],
1506 changed_entry);
1508 else
1510 struct conflict *c;
1511 ASSERT (!entry_equals (ancestor_file.entries[i],
1512 changed_entry));
1513 c = XMALLOC (struct conflict);
1514 c->num_old_entries = 1;
1515 c->old_entries =
1516 XNMALLOC (c->num_old_entries, struct entry *);
1517 c->old_entries[0] = ancestor_file.entries[i];
1518 c->num_modified_entries = 1;
1519 c->modified_entries =
1520 XNMALLOC (c->num_modified_entries, struct entry *);
1521 c->modified_entries[0] = changed_entry;
1522 gl_list_add_last (result_conflicts, c);
1525 done = true;
1527 else
1529 idx_t i_before;
1530 ptrdiff_t k_before;
1531 bool linear;
1532 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1533 ASSERT (i_before >= 0);
1534 /* A simple change after ancestor_file.entries[i_before].
1535 See whether this entry and the following num_changed
1536 entries still exist in mainstream_file and are still
1537 consecutive. */
1538 k_before = entries_mapping_get (&mapping, i_before);
1539 linear = (k_before >= 0);
1540 if (linear)
1542 idx_t i;
1543 for (i = i_before + 1; i <= i_before + num_changed; i++)
1544 if (entries_mapping_get (&mapping, i) != k_before + (i - i_before))
1546 linear = false;
1547 break;
1550 if (linear)
1552 gl_list_node_t node_for_insert =
1553 result_entries_pointers[k_before + 1];
1554 idx_t j;
1555 for (j = edit->j1 + num_added; j > edit->j1; )
1557 j--;
1558 struct entry *added_entry = modified_file.entries[j];
1559 gl_list_add_before (result_entries, node_for_insert, added_entry);
1561 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1563 struct entry *changed_entry = modified_file.entries[j];
1564 idx_t i = j + edit->i2 - edit->j2;
1565 idx_t k = entries_mapping_get (&mapping, i);
1566 ASSERT (k >= 0);
1567 if (entry_equals (ancestor_file.entries[i],
1568 mainstream_file.entries[k]))
1570 gl_list_node_set_value (result_entries,
1571 result_entries_pointers[k],
1572 changed_entry);
1574 else
1576 struct conflict *c;
1577 ASSERT (!entry_equals (ancestor_file.entries[i],
1578 changed_entry));
1579 c = XMALLOC (struct conflict);
1580 c->num_old_entries = 1;
1581 c->old_entries =
1582 XNMALLOC (c->num_old_entries, struct entry *);
1583 c->old_entries[0] = ancestor_file.entries[i];
1584 c->num_modified_entries = 1;
1585 c->modified_entries =
1586 XNMALLOC (c->num_modified_entries, struct entry *);
1587 c->modified_entries[0] = changed_entry;
1588 gl_list_add_last (result_conflicts, c);
1591 done = true;
1595 else
1597 /* A big change.
1598 See whether the num_changed entries still exist
1599 unchanged in mainstream_file and are still
1600 consecutive. */
1601 idx_t i_first;
1602 ptrdiff_t k_first;
1603 bool linear_unchanged;
1604 i_first = edit->i1;
1605 k_first = entries_mapping_get (&mapping, i_first);
1606 linear_unchanged =
1607 (k_first >= 0
1608 && entry_equals (ancestor_file.entries[i_first],
1609 mainstream_file.entries[k_first]));
1610 if (linear_unchanged)
1612 idx_t i;
1613 for (i = i_first + 1; i <= edit->i2; i++)
1614 if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first)
1615 && entry_equals (ancestor_file.entries[i],
1616 mainstream_file.entries[entries_mapping_get (&mapping, i)])))
1618 linear_unchanged = false;
1619 break;
1622 if (linear_unchanged)
1624 gl_list_node_t node_for_insert =
1625 result_entries_pointers[k_first];
1626 idx_t j;
1627 idx_t i;
1628 for (j = edit->j2; j > edit->j1; )
1630 j--;
1631 struct entry *new_entry = modified_file.entries[j];
1632 gl_list_add_before (result_entries, node_for_insert, new_entry);
1634 for (i = edit->i1; i <= edit->i2; i++)
1636 idx_t k = entries_mapping_get (&mapping, i);
1637 ASSERT (k >= 0);
1638 ASSERT (entry_equals (ancestor_file.entries[i],
1639 mainstream_file.entries[k]));
1640 gl_list_node_set_value (result_entries,
1641 result_entries_pointers[k],
1642 &empty_entry);
1644 done = true;
1648 if (!done)
1650 struct conflict *c = XMALLOC (struct conflict);
1651 idx_t i, j;
1652 c->num_old_entries = edit->i2 - edit->i1 + 1;
1653 c->old_entries =
1654 XNMALLOC (c->num_old_entries, struct entry *);
1655 for (i = edit->i1; i <= edit->i2; i++)
1656 c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1657 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1658 c->modified_entries =
1659 XNMALLOC (c->num_modified_entries, struct entry *);
1660 for (j = edit->j1; j <= edit->j2; j++)
1661 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1662 gl_list_add_last (result_conflicts, c);
1665 break;
1670 /* Output the result. */
1672 FILE *fp = fopen (destination_file_name, "w");
1673 if (fp == NULL)
1675 fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1676 exit (EXIT_FAILURE);
1679 /* Output the conflicts at the top. */
1681 idx_t n = gl_list_size (result_conflicts);
1682 idx_t i;
1683 for (i = 0; i < n; i++)
1684 conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1686 /* Output the modified and unmodified entries, in order. */
1688 gl_list_iterator_t iter = gl_list_iterator (result_entries);
1689 const void *elt;
1690 gl_list_node_t node;
1691 while (gl_list_iterator_next (&iter, &elt, &node))
1692 entry_write (fp, (struct entry *) elt);
1693 gl_list_iterator_free (&iter);
1696 if (fwriteerror (fp))
1698 fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1699 exit (EXIT_FAILURE);
1703 exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);