2 * diff_file.c : routines for doing diffs on files
4 * ====================================================================
5 * Copyright (c) 2000-2006 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
21 #include <apr_pools.h>
22 #include <apr_general.h>
23 #include <apr_file_io.h>
24 #include <apr_file_info.h>
27 #include <apr_getopt.h>
29 #include "svn_error.h"
31 #include "svn_types.h"
32 #include "svn_string.h"
35 #include "svn_pools.h"
37 #include "svn_private_config.h"
39 #include "svn_ctype.h"
42 /* A token, i.e. a line read from a file. */
43 typedef struct svn_diff__file_token_t
45 /* Next token in free list. */
46 struct svn_diff__file_token_t
*next
;
47 svn_diff_datasource_e datasource
;
48 /* Offset in the datasource. */
50 /* Offset of the normalized token (may skip leading whitespace) */
51 apr_off_t norm_offset
;
52 /* Total length - before normalization. */
54 /* Total length - after normalization. */
56 } svn_diff__file_token_t
;
59 typedef struct svn_diff__file_baton_t
61 const svn_diff_file_options_t
*options
;
72 /* List of free tokens that may be reused. */
73 svn_diff__file_token_t
*tokens
;
75 svn_diff__normalize_state_t normalize_state
[4];
78 } svn_diff__file_baton_t
;
81 /* Look for the start of an end-of-line sequence (i.e. CR or LF)
82 * in the array pointed to by BUF, of length LEN.
83 * If such a byte is found, return the pointer to it, else return NULL.
86 find_eol_start(char *buf
, apr_size_t len
)
88 for (; len
> 0; ++buf
, --len
)
90 if (*buf
== '\n' || *buf
== '\r')
97 datasource_to_index(svn_diff_datasource_e datasource
)
101 case svn_diff_datasource_original
:
104 case svn_diff_datasource_modified
:
107 case svn_diff_datasource_latest
:
110 case svn_diff_datasource_ancestor
:
117 /* Files are read in chunks of 128k. There is no support for this number
118 * whatsoever. If there is a number someone comes up with that has some
119 * argumentation, let's use that.
121 #define CHUNK_SHIFT 17
122 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
124 #define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
125 #define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
126 #define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
129 /* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
130 * *LENGTH. The actual bytes read are stored in *LENGTH on return.
132 static APR_INLINE svn_error_t
*
133 read_chunk(apr_file_t
*file
, const char *path
,
134 char *buffer
, apr_size_t length
,
135 apr_off_t offset
, apr_pool_t
*pool
)
137 /* XXX: The final offset may not be the one we asked for.
140 SVN_ERR(svn_io_file_seek(file
, APR_SET
, &offset
, pool
));
141 SVN_ERR(svn_io_file_read_full(file
, buffer
, length
, NULL
, pool
));
147 /* Map or read a file at PATH. *BUFFER will point to the file
148 * contents; if the file was mapped, *FILE and *MM will contain the
149 * mmap context; otherwise they will be NULL. SIZE will contain the
150 * file size. Allocate from POOL.
153 #define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
154 #define MMAP_T_ARG(NAME) &(NAME),
156 #define MMAP_T_PARAM(NAME)
157 #define MMAP_T_ARG(NAME)
161 map_or_read_file(apr_file_t
**file
,
163 char **buffer
, apr_off_t
*size
,
164 const char *path
, apr_pool_t
*pool
)
171 SVN_ERR(svn_io_file_open(file
, path
, APR_READ
, APR_OS_DEFAULT
, pool
));
172 SVN_ERR(svn_io_file_info_get(&finfo
, APR_FINFO_SIZE
, *file
, pool
));
175 if (finfo
.size
> APR_MMAP_THRESHOLD
)
177 rv
= apr_mmap_create(mm
, *file
, 0, finfo
.size
, APR_MMAP_READ
, pool
);
178 if (rv
== APR_SUCCESS
)
183 /* On failure we just fall through and try reading the file into
187 #endif /* APR_HAS_MMAP */
189 if (*buffer
== NULL
&& finfo
.size
> 0)
191 *buffer
= apr_palloc(pool
, finfo
.size
);
193 SVN_ERR(svn_io_file_read_full(*file
, *buffer
, finfo
.size
, NULL
, pool
));
195 /* Since we have the entire contents of the file we can
198 SVN_ERR(svn_io_file_close(*file
, pool
));
209 /* Implements svn_diff_fns_t::datasource_open */
211 datasource_open(void *baton
, svn_diff_datasource_e datasource
)
213 svn_diff__file_baton_t
*file_baton
= baton
;
220 idx
= datasource_to_index(datasource
);
222 SVN_ERR(svn_io_file_open(&file_baton
->file
[idx
], file_baton
->path
[idx
],
223 APR_READ
, APR_OS_DEFAULT
, file_baton
->pool
));
225 SVN_ERR(svn_io_file_info_get(&finfo
, APR_FINFO_SIZE
,
226 file_baton
->file
[idx
], file_baton
->pool
));
228 file_baton
->size
[idx
] = finfo
.size
;
229 length
= finfo
.size
> CHUNK_SIZE
? CHUNK_SIZE
: finfo
.size
;
234 endp
= curp
= apr_palloc(file_baton
->pool
, length
);
237 file_baton
->buffer
[idx
] = file_baton
->curp
[idx
] = curp
;
238 file_baton
->endp
[idx
] = endp
;
240 SVN_ERR(read_chunk(file_baton
->file
[idx
], file_baton
->path
[idx
],
241 curp
, length
, 0, file_baton
->pool
));
247 /* Implements svn_diff_fns_t::datasource_close */
249 datasource_close(void *baton
, svn_diff_datasource_e datasource
)
251 /* Do nothing. The compare_token function needs previous datasources
252 * to stay available until all datasources are processed.
258 /* Implements svn_diff_fns_t::datasource_get_next_token */
260 datasource_get_next_token(apr_uint32_t
*hash
, void **token
, void *baton
,
261 svn_diff_datasource_e datasource
)
263 svn_diff__file_baton_t
*file_baton
= baton
;
264 svn_diff__file_token_t
*file_token
;
272 /* Did the last chunk end in a CR character? */
273 svn_boolean_t had_cr
= FALSE
;
277 idx
= datasource_to_index(datasource
);
279 curp
= file_baton
->curp
[idx
];
280 endp
= file_baton
->endp
[idx
];
282 last_chunk
= offset_to_chunk(file_baton
->size
[idx
]);
285 && last_chunk
== file_baton
->chunk
[idx
])
290 /* Get a new token */
291 file_token
= file_baton
->tokens
;
294 file_baton
->tokens
= file_token
->next
;
298 file_token
= apr_palloc(file_baton
->pool
, sizeof(*file_token
));
301 file_token
->datasource
= datasource
;
302 file_token
->offset
= chunk_to_offset(file_baton
->chunk
[idx
])
303 + (curp
- file_baton
->buffer
[idx
]);
304 file_token
->raw_length
= 0;
305 file_token
->length
= 0;
309 eol
= find_eol_start(curp
, endp
- curp
);
312 had_cr
= (*eol
== '\r');
314 /* If we have the whole eol sequence in the chunk... */
315 if (!had_cr
|| eol
!= endp
)
317 if (had_cr
&& *eol
== '\n')
323 if (file_baton
->chunk
[idx
] == last_chunk
)
329 length
= endp
- curp
;
330 file_token
->raw_length
+= length
;
331 svn_diff__normalize_buffer(&curp
, &length
,
332 &file_baton
->normalize_state
[idx
],
333 curp
, file_baton
->options
);
334 file_token
->length
+= length
;
335 h
= svn_diff__adler32(h
, curp
, length
);
337 curp
= endp
= file_baton
->buffer
[idx
];
338 file_baton
->chunk
[idx
]++;
339 length
= file_baton
->chunk
[idx
] == last_chunk
?
340 offset_in_chunk(file_baton
->size
[idx
]) : CHUNK_SIZE
;
342 file_baton
->endp
[idx
] = endp
;
344 SVN_ERR(read_chunk(file_baton
->file
[idx
], file_baton
->path
[idx
],
346 chunk_to_offset(file_baton
->chunk
[idx
]),
349 /* If the last chunk ended in a CR, we're done. */
360 file_token
->raw_length
+= length
;
361 file_baton
->curp
[idx
] = eol
;
363 /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
364 * with a spurious empty token. Avoid returning it.
365 * Note that we use the unnormalized length; we don't want a line containing
366 * only spaces (and no trailing newline) to appear like a non-existent
368 if (file_token
->raw_length
> 0)
371 svn_diff__normalize_buffer(&c
, &length
,
372 &file_baton
->normalize_state
[idx
],
373 curp
, file_baton
->options
);
375 file_token
->norm_offset
= file_token
->offset
+ (c
- curp
);
376 file_token
->length
+= length
;
378 *hash
= svn_diff__adler32(h
, c
, length
);
385 #define COMPARE_CHUNK_SIZE 4096
387 /* Implements svn_diff_fns_t::token_compare */
389 token_compare(void *baton
, void *token1
, void *token2
, int *compare
)
391 svn_diff__file_baton_t
*file_baton
= baton
;
392 svn_diff__file_token_t
*file_token
[2];
393 char buffer
[2][COMPARE_CHUNK_SIZE
];
398 apr_off_t total_length
;
399 /* How much is left to read of each token from the file. */
400 apr_off_t raw_length
[2];
403 svn_diff__normalize_state_t state
[2];
405 file_token
[0] = token1
;
406 file_token
[1] = token2
;
407 if (file_token
[0]->length
< file_token
[1]->length
)
413 if (file_token
[0]->length
> file_token
[1]->length
)
419 total_length
= file_token
[0]->length
;
420 if (total_length
== 0)
426 for (i
= 0; i
< 2; ++i
)
428 idx
[i
] = datasource_to_index(file_token
[i
]->datasource
);
429 offset
[i
] = file_token
[i
]->norm_offset
;
430 chunk
[i
] = file_baton
->chunk
[idx
[i
]];
431 state
[i
] = svn_diff__normalize_state_normal
;
433 if (offset_to_chunk(offset
[i
]) == chunk
[i
])
435 /* If the start of the token is in memory, the entire token is
438 bufp
[i
] = file_baton
->buffer
[idx
[i
]];
439 bufp
[i
] += offset_in_chunk(offset
[i
]);
441 length
[i
] = total_length
;
447 raw_length
[i
] = file_token
[i
]->raw_length
;
454 for (i
= 0; i
< 2; i
++)
458 /* Error if raw_length is 0, that's an unexpected change
459 * of the file that can happen when ingoring whitespace
460 * and that can lead to an infinite loop. */
461 if (raw_length
[i
] == 0)
462 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED
,
464 _("The file '%s' changed unexpectedly"
466 file_baton
->path
[idx
[i
]]);
468 /* Read a chunk from disk into a buffer */
470 length
[i
] = raw_length
[i
] > COMPARE_CHUNK_SIZE
?
471 COMPARE_CHUNK_SIZE
: raw_length
[i
];
473 SVN_ERR(read_chunk(file_baton
->file
[idx
[i
]],
474 file_baton
->path
[idx
[i
]],
475 bufp
[i
], length
[i
], offset
[i
],
477 offset
[i
] += length
[i
];
478 raw_length
[i
] -= length
[i
];
479 /* bufp[i] gets reset to buffer[i] before reading each chunk,
480 so, overwriting it isn't a problem */
481 svn_diff__normalize_buffer(&bufp
[i
], &length
[i
], &state
[i
],
482 bufp
[i
], file_baton
->options
);
486 len
= length
[0] > length
[1] ? length
[1] : length
[0];
488 /* Compare two chunks (that could be entire tokens if they both reside
491 *compare
= memcmp(bufp
[0], bufp
[1], len
);
501 while(total_length
> 0);
508 /* Implements svn_diff_fns_t::token_discard */
510 token_discard(void *baton
, void *token
)
512 svn_diff__file_baton_t
*file_baton
= baton
;
513 svn_diff__file_token_t
*file_token
= token
;
515 file_token
->next
= file_baton
->tokens
;
516 file_baton
->tokens
= file_token
;
520 /* Implements svn_diff_fns_t::token_discard_all */
522 token_discard_all(void *baton
)
524 svn_diff__file_baton_t
*file_baton
= baton
;
526 /* Discard all memory in use by the tokens, and close all open files. */
527 svn_pool_clear(file_baton
->pool
);
531 static const svn_diff_fns_t svn_diff__file_vtable
=
535 datasource_get_next_token
,
541 /* Id for the --ignore-eol-style option, which doesn't have a short name. */
542 #define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
544 /* Options supported by svn_diff_file_options_parse(). */
545 static const apr_getopt_option_t diff_options
[] =
547 { "ignore-space-change", 'b', 0, NULL
},
548 { "ignore-all-space", 'w', 0, NULL
},
549 { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE
, 0, NULL
},
550 { "show-c-function", 'p', 0, NULL
},
551 /* ### For compatibility; we don't support the argument to -u, because
552 * ### we don't have optional argument support. */
553 { "unified", 'u', 0, NULL
},
557 svn_diff_file_options_t
*
558 svn_diff_file_options_create(apr_pool_t
*pool
)
560 return apr_pcalloc(pool
, sizeof(svn_diff_file_options_t
));
564 svn_diff_file_options_parse(svn_diff_file_options_t
*options
,
565 const apr_array_header_t
*args
,
569 /* Make room for each option (starting at index 1) plus trailing NULL. */
570 const char **argv
= apr_palloc(pool
, sizeof(char*) * (args
->nelts
+ 2));
573 memcpy((void *) (argv
+ 1), args
->elts
, sizeof(char*) * args
->nelts
);
574 argv
[args
->nelts
+ 1] = NULL
;
576 apr_getopt_init(&os
, pool
, args
->nelts
+ 1, argv
);
577 /* No printing of error messages, please! */
583 apr_status_t err
= apr_getopt_long(os
, diff_options
, &opt_id
, &opt_arg
);
585 if (APR_STATUS_IS_EOF(err
))
588 return svn_error_wrap_apr(err
, _("Error parsing diff options"));
593 /* -w takes precedence over -b. */
594 if (! options
->ignore_space
)
595 options
->ignore_space
= svn_diff_file_ignore_space_change
;
598 options
->ignore_space
= svn_diff_file_ignore_space_all
;
600 case SVN_DIFF__OPT_IGNORE_EOL_STYLE
:
601 options
->ignore_eol_style
= TRUE
;
604 options
->show_c_function
= TRUE
;
611 /* Check for spurious arguments. */
612 if (os
->ind
< os
->argc
)
613 return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION
, NULL
,
614 _("Invalid argument '%s' in diff options"),
621 svn_diff_file_diff_2(svn_diff_t
**diff
,
622 const char *original
,
623 const char *modified
,
624 const svn_diff_file_options_t
*options
,
627 svn_diff__file_baton_t baton
;
629 memset(&baton
, 0, sizeof(baton
));
630 baton
.options
= options
;
631 baton
.path
[0] = original
;
632 baton
.path
[1] = modified
;
633 baton
.pool
= svn_pool_create(pool
);
635 SVN_ERR(svn_diff_diff(diff
, &baton
, &svn_diff__file_vtable
, pool
));
637 svn_pool_destroy(baton
.pool
);
642 svn_diff_file_diff(svn_diff_t
**diff
,
643 const char *original
,
644 const char *modified
,
647 return svn_diff_file_diff_2(diff
, original
, modified
,
648 svn_diff_file_options_create(pool
), pool
);
652 svn_diff_file_diff3_2(svn_diff_t
**diff
,
653 const char *original
,
654 const char *modified
,
656 const svn_diff_file_options_t
*options
,
659 svn_diff__file_baton_t baton
;
661 memset(&baton
, 0, sizeof(baton
));
662 baton
.options
= options
;
663 baton
.path
[0] = original
;
664 baton
.path
[1] = modified
;
665 baton
.path
[2] = latest
;
666 baton
.pool
= svn_pool_create(pool
);
668 SVN_ERR(svn_diff_diff3(diff
, &baton
, &svn_diff__file_vtable
, pool
));
670 svn_pool_destroy(baton
.pool
);
675 svn_diff_file_diff3(svn_diff_t
**diff
,
676 const char *original
,
677 const char *modified
,
681 return svn_diff_file_diff3_2(diff
, original
, modified
, latest
,
682 svn_diff_file_options_create(pool
), pool
);
686 svn_diff_file_diff4_2(svn_diff_t
**diff
,
687 const char *original
,
688 const char *modified
,
690 const char *ancestor
,
691 const svn_diff_file_options_t
*options
,
694 svn_diff__file_baton_t baton
;
696 memset(&baton
, 0, sizeof(baton
));
697 baton
.options
= options
;
698 baton
.path
[0] = original
;
699 baton
.path
[1] = modified
;
700 baton
.path
[2] = latest
;
701 baton
.path
[3] = ancestor
;
702 baton
.pool
= svn_pool_create(pool
);
704 SVN_ERR(svn_diff_diff4(diff
, &baton
, &svn_diff__file_vtable
, pool
));
706 svn_pool_destroy(baton
.pool
);
711 svn_diff_file_diff4(svn_diff_t
**diff
,
712 const char *original
,
713 const char *modified
,
715 const char *ancestor
,
718 return svn_diff_file_diff4_2(diff
, original
, modified
, latest
, ancestor
,
719 svn_diff_file_options_create(pool
), pool
);
722 /** Display unified context diffs **/
724 /* Maximum length of the extra context to show when show_c_function is set.
725 * GNU diff uses 40, let's be brave and use 50 instead. */
726 #define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
727 typedef struct svn_diff__file_output_baton_t
729 svn_stream_t
*output_stream
;
730 const char *header_encoding
;
732 /* Cached markers, in header_encoding. */
733 const char *context_str
;
734 const char *delete_str
;
735 const char *insert_str
;
740 apr_off_t current_line
[2];
742 char buffer
[2][4096];
743 apr_size_t length
[2];
746 apr_off_t hunk_start
[2];
747 apr_off_t hunk_length
[2];
748 svn_stringbuf_t
*hunk
;
750 /* Should we emit C functions in the unified diff header */
751 svn_boolean_t show_c_function
;
752 /* Extra strings to skip over if we match. */
753 apr_array_header_t
*extra_skip_match
;
754 /* "Context" to append to the @@ line when the show_c_function option
756 svn_stringbuf_t
*extra_context
;
757 /* Extra context for the current hunk. */
758 char hunk_extra_context
[SVN_DIFF__EXTRA_CONTEXT_LENGTH
+ 1];
761 } svn_diff__file_output_baton_t
;
763 typedef enum svn_diff__file_output_unified_type_e
765 svn_diff__file_output_unified_skip
,
766 svn_diff__file_output_unified_context
,
767 svn_diff__file_output_unified_delete
,
768 svn_diff__file_output_unified_insert
769 } svn_diff__file_output_unified_type_e
;
773 output_unified_line(svn_diff__file_output_baton_t
*baton
,
774 svn_diff__file_output_unified_type_e type
, int idx
)
780 svn_boolean_t bytes_processed
= FALSE
;
781 svn_boolean_t had_cr
= FALSE
;
782 /* Are we collecting extra context? */
783 svn_boolean_t collect_extra
= FALSE
;
785 length
= baton
->length
[idx
];
786 curp
= baton
->curp
[idx
];
788 /* Lazily update the current line even if we're at EOF.
789 * This way we fake output of context at EOF
791 baton
->current_line
[idx
]++;
793 if (length
== 0 && apr_file_eof(baton
->file
[idx
]))
802 if (!bytes_processed
)
806 case svn_diff__file_output_unified_context
:
807 svn_stringbuf_appendcstr(baton
->hunk
, baton
->context_str
);
808 baton
->hunk_length
[0]++;
809 baton
->hunk_length
[1]++;
811 case svn_diff__file_output_unified_delete
:
812 svn_stringbuf_appendcstr(baton
->hunk
, baton
->delete_str
);
813 baton
->hunk_length
[0]++;
815 case svn_diff__file_output_unified_insert
:
816 svn_stringbuf_appendcstr(baton
->hunk
, baton
->insert_str
);
817 baton
->hunk_length
[1]++;
823 if (baton
->show_c_function
824 && (type
== svn_diff__file_output_unified_skip
825 || type
== svn_diff__file_output_unified_context
)
826 && (svn_ctype_isalpha(*curp
) || *curp
== '$' || *curp
== '_')
827 && !svn_cstring_match_glob_list(curp
,
828 baton
->extra_skip_match
))
830 svn_stringbuf_setempty(baton
->extra_context
);
831 collect_extra
= TRUE
;
835 eol
= find_eol_start(curp
, length
);
841 had_cr
= (*eol
== '\r');
843 len
= (apr_size_t
)(eol
- curp
);
845 if (! had_cr
|| len
< length
)
847 if (had_cr
&& *eol
== '\n')
855 if (type
!= svn_diff__file_output_unified_skip
)
857 svn_stringbuf_appendbytes(baton
->hunk
, curp
, len
);
861 svn_stringbuf_appendbytes(baton
->extra_context
,
865 baton
->curp
[idx
] = eol
;
866 baton
->length
[idx
] = length
;
874 if (type
!= svn_diff__file_output_unified_skip
)
876 svn_stringbuf_appendbytes(baton
->hunk
, curp
, length
);
881 svn_stringbuf_appendbytes(baton
->extra_context
, curp
, length
);
884 bytes_processed
= TRUE
;
887 curp
= baton
->buffer
[idx
];
888 length
= sizeof(baton
->buffer
[idx
]);
890 err
= svn_io_file_read(baton
->file
[idx
], curp
, &length
, baton
->pool
);
892 /* If the last chunk ended with a CR, we look for an LF at the start
896 if (! err
&& length
> 0 && *curp
== '\n')
898 if (type
!= svn_diff__file_output_unified_skip
)
900 svn_stringbuf_appendbytes(baton
->hunk
, curp
, 1);
902 /* We don't append the LF to extra_context, since it would
903 * just be stripped anyway. */
908 baton
->curp
[idx
] = curp
;
909 baton
->length
[idx
] = length
;
916 if (err
&& ! APR_STATUS_IS_EOF(err
->apr_err
))
919 if (err
&& APR_STATUS_IS_EOF(err
->apr_err
))
921 svn_error_clear(err
);
922 /* Special case if we reach the end of file AND the last line is in the
923 changed range AND the file doesn't end with a newline */
924 if (bytes_processed
&& (type
!= svn_diff__file_output_unified_skip
)
928 SVN_ERR(svn_utf_cstring_from_utf8_ex2
930 /* The string below is intentionally not marked for
931 translation: it's vital to correct operation of
932 the diff(1)/patch(1) program pair. */
933 APR_EOL_STR
"\\ No newline at end of file" APR_EOL_STR
,
934 baton
->header_encoding
, baton
->pool
));
935 svn_stringbuf_appendcstr(baton
->hunk
, out_str
);
938 baton
->length
[idx
] = 0;
945 output_unified_flush_hunk(svn_diff__file_output_baton_t
*baton
)
947 apr_off_t target_line
;
951 if (svn_stringbuf_isempty(baton
->hunk
))
953 /* Nothing to flush */
957 target_line
= baton
->hunk_start
[0] + baton
->hunk_length
[0]
958 + SVN_DIFF__UNIFIED_CONTEXT_SIZE
;
960 /* Add trailing context to the hunk */
961 while (baton
->current_line
[0] < target_line
)
963 SVN_ERR(output_unified_line
964 (baton
, svn_diff__file_output_unified_context
, 0));
967 /* If the file is non-empty, convert the line indexes from
968 zero based to one based */
969 for (i
= 0; i
< 2; i
++)
971 if (baton
->hunk_length
[i
] > 0)
972 baton
->hunk_start
[i
]++;
975 /* Output the hunk header. If the hunk length is 1, the file is a one line
976 file. In this case, surpress the number of lines in the hunk (it is
979 SVN_ERR(svn_stream_printf_from_utf8(baton
->output_stream
,
980 baton
->header_encoding
,
982 "@@ -%" APR_OFF_T_FMT
,
983 baton
->hunk_start
[0]));
984 if (baton
->hunk_length
[0] != 1)
986 SVN_ERR(svn_stream_printf_from_utf8(baton
->output_stream
,
987 baton
->header_encoding
,
988 baton
->pool
, ",%" APR_OFF_T_FMT
,
989 baton
->hunk_length
[0]));
992 SVN_ERR(svn_stream_printf_from_utf8(baton
->output_stream
,
993 baton
->header_encoding
,
994 baton
->pool
, " +%" APR_OFF_T_FMT
,
995 baton
->hunk_start
[1]));
996 if (baton
->hunk_length
[1] != 1)
998 SVN_ERR(svn_stream_printf_from_utf8(baton
->output_stream
,
999 baton
->header_encoding
,
1000 baton
->pool
, ",%" APR_OFF_T_FMT
,
1001 baton
->hunk_length
[1]));
1004 SVN_ERR(svn_stream_printf_from_utf8(baton
->output_stream
,
1005 baton
->header_encoding
,
1006 baton
->pool
, " @@%s%s" APR_EOL_STR
,
1007 baton
->hunk_extra_context
[0]
1009 baton
->hunk_extra_context
));
1011 /* Output the hunk content */
1012 hunk_len
= baton
->hunk
->len
;
1013 SVN_ERR(svn_stream_write(baton
->output_stream
, baton
->hunk
->data
,
1016 /* Prepare for the next hunk */
1017 baton
->hunk_length
[0] = 0;
1018 baton
->hunk_length
[1] = 0;
1019 svn_stringbuf_setempty(baton
->hunk
);
1021 return SVN_NO_ERROR
;
1024 static svn_error_t
*
1025 output_unified_diff_modified(void *baton
,
1026 apr_off_t original_start
, apr_off_t original_length
,
1027 apr_off_t modified_start
, apr_off_t modified_length
,
1028 apr_off_t latest_start
, apr_off_t latest_length
)
1030 svn_diff__file_output_baton_t
*output_baton
= baton
;
1031 apr_off_t target_line
[2];
1034 target_line
[0] = original_start
>= SVN_DIFF__UNIFIED_CONTEXT_SIZE
1035 ? original_start
- SVN_DIFF__UNIFIED_CONTEXT_SIZE
: 0;
1036 target_line
[1] = modified_start
;
1038 /* If the changed ranges are far enough apart (no overlapping or connecting
1039 context), flush the current hunk, initialize the next hunk and skip the
1040 lines not in context. Also do this when this is the first hunk.
1042 if (output_baton
->current_line
[0] < target_line
[0]
1043 && (output_baton
->hunk_start
[0] + output_baton
->hunk_length
[0]
1044 + SVN_DIFF__UNIFIED_CONTEXT_SIZE
< target_line
[0]
1045 || output_baton
->hunk_length
[0] == 0))
1047 SVN_ERR(output_unified_flush_hunk(output_baton
));
1049 output_baton
->hunk_start
[0] = target_line
[0];
1050 output_baton
->hunk_start
[1] = target_line
[1] + target_line
[0]
1053 /* Skip lines until we are at the beginning of the context we want to
1055 while (output_baton
->current_line
[0] < target_line
[0])
1057 SVN_ERR(output_unified_line(output_baton
,
1058 svn_diff__file_output_unified_skip
, 0));
1061 if (output_baton
->show_c_function
)
1065 /* Save the extra context for later use.
1066 * Note that the last byte of the hunk_extra_context array is never
1067 * touched after it is zero-initialized, so the array is always
1069 strncpy(output_baton
->hunk_extra_context
,
1070 output_baton
->extra_context
->data
,
1071 SVN_DIFF__EXTRA_CONTEXT_LENGTH
);
1072 /* Trim whitespace at the end, most notably to get rid of any
1073 * newline characters. */
1074 p
= strlen(output_baton
->hunk_extra_context
);
1076 && svn_ctype_isspace(output_baton
->hunk_extra_context
[p
- 1]))
1078 output_baton
->hunk_extra_context
[--p
] = '\0';
1083 /* Skip lines until we are at the start of the changed range */
1084 while (output_baton
->current_line
[1] < target_line
[1])
1086 SVN_ERR(output_unified_line(output_baton
,
1087 svn_diff__file_output_unified_skip
, 1));
1090 /* Output the context preceding the changed range */
1091 while (output_baton
->current_line
[0] < original_start
)
1093 SVN_ERR(output_unified_line(output_baton
,
1094 svn_diff__file_output_unified_context
, 0));
1097 target_line
[0] = original_start
+ original_length
;
1098 target_line
[1] = modified_start
+ modified_length
;
1100 /* Output the changed range */
1101 for (i
= 0; i
< 2; i
++)
1103 while (output_baton
->current_line
[i
] < target_line
[i
])
1105 SVN_ERR(output_unified_line
1107 i
== 0 ? svn_diff__file_output_unified_delete
1108 : svn_diff__file_output_unified_insert
, i
));
1112 return SVN_NO_ERROR
;
1115 /* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1116 static svn_error_t
*
1117 output_unified_default_hdr(const char **header
, const char *path
,
1120 apr_finfo_t file_info
;
1121 apr_time_exp_t exploded_time
;
1122 char time_buffer
[64];
1123 apr_size_t time_len
;
1125 SVN_ERR(svn_io_stat(&file_info
, path
, APR_FINFO_MTIME
, pool
));
1126 apr_time_exp_lt(&exploded_time
, file_info
.mtime
);
1128 apr_strftime(time_buffer
, &time_len
, sizeof(time_buffer
) - 1,
1129 "%a %b %e %H:%M:%S %Y", &exploded_time
);
1131 *header
= apr_psprintf(pool
, "%s\t%s", path
, time_buffer
);
1133 return SVN_NO_ERROR
;
1136 static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable
=
1138 NULL
, /* output_common */
1139 output_unified_diff_modified
,
1140 NULL
, /* output_diff_latest */
1141 NULL
, /* output_diff_common */
1142 NULL
/* output_conflict */
1146 svn_diff_file_output_unified3(svn_stream_t
*output_stream
,
1148 const char *original_path
,
1149 const char *modified_path
,
1150 const char *original_header
,
1151 const char *modified_header
,
1152 const char *header_encoding
,
1153 const char *relative_to_dir
,
1154 svn_boolean_t show_c_function
,
1157 svn_diff__file_output_baton_t baton
;
1160 if (svn_diff_contains_diffs(diff
))
1164 memset(&baton
, 0, sizeof(baton
));
1165 baton
.output_stream
= output_stream
;
1167 baton
.header_encoding
= header_encoding
;
1168 baton
.path
[0] = original_path
;
1169 baton
.path
[1] = modified_path
;
1170 baton
.hunk
= svn_stringbuf_create("", pool
);
1171 baton
.show_c_function
= show_c_function
;
1172 baton
.extra_context
= svn_stringbuf_create("", pool
);
1173 baton
.extra_skip_match
= apr_array_make(pool
, 3, sizeof(char **));
1175 c
= apr_array_push(baton
.extra_skip_match
);
1177 c
= apr_array_push(baton
.extra_skip_match
);
1179 c
= apr_array_push(baton
.extra_skip_match
);
1182 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton
.context_str
, " ",
1183 header_encoding
, pool
));
1184 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton
.delete_str
, "-",
1185 header_encoding
, pool
));
1186 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton
.insert_str
, "+",
1187 header_encoding
, pool
));
1189 if (relative_to_dir
)
1191 /* Possibly adjust the "original" and "modified" paths shown in
1192 the output (see issue #2723). */
1193 const char *child_path
;
1195 if (! original_header
)
1197 child_path
= svn_path_is_child(relative_to_dir
,
1198 original_path
, pool
);
1200 original_path
= child_path
;
1202 return svn_error_createf(SVN_ERR_BAD_RELATIVE_PATH
, NULL
,
1203 _("Path '%s' must be an immediate child of "
1204 "the directory '%s'"),
1205 original_path
, relative_to_dir
);
1208 if (! modified_header
)
1210 child_path
= svn_path_is_child(relative_to_dir
, modified_path
, pool
);
1212 modified_path
= child_path
;
1214 return svn_error_createf(SVN_ERR_BAD_RELATIVE_PATH
, NULL
,
1215 _("Path '%s' must be an immediate child of "
1216 "the directory '%s'"),
1217 modified_path
, relative_to_dir
);
1221 for (i
= 0; i
< 2; i
++)
1223 SVN_ERR(svn_io_file_open(&baton
.file
[i
], baton
.path
[i
],
1224 APR_READ
, APR_OS_DEFAULT
, pool
));
1227 if (original_header
== NULL
)
1229 SVN_ERR(output_unified_default_hdr
1230 (&original_header
, original_path
, pool
));
1233 if (modified_header
== NULL
)
1235 SVN_ERR(output_unified_default_hdr
1236 (&modified_header
, modified_path
, pool
));
1239 SVN_ERR(svn_stream_printf_from_utf8(output_stream
, header_encoding
, pool
,
1240 "--- %s" APR_EOL_STR
1241 "+++ %s" APR_EOL_STR
,
1242 original_header
, modified_header
));
1244 SVN_ERR(svn_diff_output(diff
, &baton
,
1245 &svn_diff__file_output_unified_vtable
));
1246 SVN_ERR(output_unified_flush_hunk(&baton
));
1248 for (i
= 0; i
< 2; i
++)
1250 SVN_ERR(svn_io_file_close(baton
.file
[i
], pool
));
1254 return SVN_NO_ERROR
;
1258 svn_diff_file_output_unified2(svn_stream_t
*output_stream
,
1260 const char *original_path
,
1261 const char *modified_path
,
1262 const char *original_header
,
1263 const char *modified_header
,
1264 const char *header_encoding
,
1267 return svn_diff_file_output_unified3(output_stream
, diff
,
1268 original_path
, modified_path
,
1269 original_header
, modified_header
,
1270 header_encoding
, NULL
, FALSE
, pool
);
1274 svn_diff_file_output_unified(svn_stream_t
*output_stream
,
1276 const char *original_path
,
1277 const char *modified_path
,
1278 const char *original_header
,
1279 const char *modified_header
,
1282 return svn_diff_file_output_unified2(output_stream
, diff
,
1283 original_path
, modified_path
,
1284 original_header
, modified_header
,
1285 SVN_APR_LOCALE_CHARSET
, pool
);
1289 /** Display diff3 **/
1291 typedef struct svn_diff3__file_output_baton_t
1293 svn_stream_t
*output_stream
;
1295 const char *path
[3];
1297 apr_off_t current_line
[3];
1303 /* The following four members are in the encoding used for the output. */
1304 const char *conflict_modified
;
1305 const char *conflict_original
;
1306 const char *conflict_separator
;
1307 const char *conflict_latest
;
1309 svn_boolean_t display_original_in_conflict
;
1310 svn_boolean_t display_resolved_conflicts
;
1313 } svn_diff3__file_output_baton_t
;
1315 typedef enum svn_diff3__file_output_type_e
1317 svn_diff3__file_output_skip
,
1318 svn_diff3__file_output_normal
1319 } svn_diff3__file_output_type_e
;
1322 static svn_error_t
*
1323 output_line(svn_diff3__file_output_baton_t
*baton
,
1324 svn_diff3__file_output_type_e type
, int idx
)
1331 curp
= baton
->curp
[idx
];
1332 endp
= baton
->endp
[idx
];
1334 /* Lazily update the current line even if we're at EOF.
1336 baton
->current_line
[idx
]++;
1339 return SVN_NO_ERROR
;
1341 eol
= find_eol_start(curp
, endp
- curp
);
1346 svn_boolean_t had_cr
= (*eol
== '\r');
1348 if (had_cr
&& eol
!= endp
&& *eol
== '\n')
1352 if (type
!= svn_diff3__file_output_skip
)
1355 SVN_ERR(svn_stream_write(baton
->output_stream
, curp
, &len
));
1358 baton
->curp
[idx
] = eol
;
1360 return SVN_NO_ERROR
;
1363 static svn_error_t
*
1364 output_hunk(void *baton
, int idx
, apr_off_t target_line
,
1365 apr_off_t target_length
)
1367 svn_diff3__file_output_baton_t
*output_baton
= baton
;
1369 /* Skip lines until we are at the start of the changed range */
1370 while (output_baton
->current_line
[idx
] < target_line
)
1372 SVN_ERR(output_line(output_baton
, svn_diff3__file_output_skip
, idx
));
1375 target_line
+= target_length
;
1377 while (output_baton
->current_line
[idx
] < target_line
)
1379 SVN_ERR(output_line(output_baton
, svn_diff3__file_output_normal
, idx
));
1382 return SVN_NO_ERROR
;
1385 static svn_error_t
*
1386 output_common(void *baton
, apr_off_t original_start
, apr_off_t original_length
,
1387 apr_off_t modified_start
, apr_off_t modified_length
,
1388 apr_off_t latest_start
, apr_off_t latest_length
)
1390 return output_hunk(baton
, 1, modified_start
, modified_length
);
1393 static svn_error_t
*
1394 output_diff_modified(void *baton
,
1395 apr_off_t original_start
, apr_off_t original_length
,
1396 apr_off_t modified_start
, apr_off_t modified_length
,
1397 apr_off_t latest_start
, apr_off_t latest_length
)
1399 return output_hunk(baton
, 1, modified_start
, modified_length
);
1402 static svn_error_t
*
1403 output_diff_latest(void *baton
,
1404 apr_off_t original_start
, apr_off_t original_length
,
1405 apr_off_t modified_start
, apr_off_t modified_length
,
1406 apr_off_t latest_start
, apr_off_t latest_length
)
1408 return output_hunk(baton
, 2, latest_start
, latest_length
);
1411 static svn_error_t
*
1412 output_conflict(void *baton
,
1413 apr_off_t original_start
, apr_off_t original_length
,
1414 apr_off_t modified_start
, apr_off_t modified_length
,
1415 apr_off_t latest_start
, apr_off_t latest_length
,
1418 static const svn_diff_output_fns_t svn_diff3__file_output_vtable
=
1421 output_diff_modified
,
1423 output_diff_modified
, /* output_diff_common */
1427 static svn_error_t
*
1428 output_conflict(void *baton
,
1429 apr_off_t original_start
, apr_off_t original_length
,
1430 apr_off_t modified_start
, apr_off_t modified_length
,
1431 apr_off_t latest_start
, apr_off_t latest_length
,
1434 svn_diff3__file_output_baton_t
*file_baton
= baton
;
1437 if (diff
&& file_baton
->display_resolved_conflicts
)
1439 return svn_diff_output(diff
, baton
,
1440 &svn_diff3__file_output_vtable
);
1443 len
= strlen(file_baton
->conflict_modified
);
1444 SVN_ERR(svn_stream_write(file_baton
->output_stream
,
1445 file_baton
->conflict_modified
,
1448 SVN_ERR(output_hunk(baton
, 1, modified_start
, modified_length
));
1450 if (file_baton
->display_original_in_conflict
)
1452 len
= strlen(file_baton
->conflict_original
);
1453 SVN_ERR(svn_stream_write(file_baton
->output_stream
,
1454 file_baton
->conflict_original
, &len
));
1456 SVN_ERR(output_hunk(baton
, 0, original_start
, original_length
));
1459 len
= strlen(file_baton
->conflict_separator
);
1460 SVN_ERR(svn_stream_write(file_baton
->output_stream
,
1461 file_baton
->conflict_separator
, &len
));
1463 SVN_ERR(output_hunk(baton
, 2, latest_start
, latest_length
));
1465 len
= strlen(file_baton
->conflict_latest
);
1466 SVN_ERR(svn_stream_write(file_baton
->output_stream
,
1467 file_baton
->conflict_latest
, &len
));
1469 return SVN_NO_ERROR
;
1472 /* Return the first eol marker found in [BUF, ENDP) as a
1473 * NUL-terminated string, or NULL if no eol marker is found.
1475 * If the last valid character of BUF is the first byte of a
1476 * potentially two-byte eol sequence, just return "\r", that is,
1477 * assume BUF represents a CR-only file. This is correct for callers
1478 * that pass an entire file at once, and is no more likely to be
1479 * incorrect than correct for any caller that doesn't.
1482 detect_eol(char *buf
, char *endp
)
1484 const char *eol
= find_eol_start(buf
, endp
- buf
);
1490 /* We found a CR. */
1492 if (eol
== endp
|| *eol
!= '\n')
1501 svn_diff_file_output_merge(svn_stream_t
*output_stream
,
1503 const char *original_path
,
1504 const char *modified_path
,
1505 const char *latest_path
,
1506 const char *conflict_original
,
1507 const char *conflict_modified
,
1508 const char *conflict_latest
,
1509 const char *conflict_separator
,
1510 svn_boolean_t display_original_in_conflict
,
1511 svn_boolean_t display_resolved_conflicts
,
1514 svn_diff3__file_output_baton_t baton
;
1515 apr_file_t
*file
[3];
1519 apr_mmap_t
*mm
[3] = { 0 };
1520 #endif /* APR_HAS_MMAP */
1523 memset(&baton
, 0, sizeof(baton
));
1524 baton
.output_stream
= output_stream
;
1526 baton
.path
[0] = original_path
;
1527 baton
.path
[1] = modified_path
;
1528 baton
.path
[2] = latest_path
;
1529 SVN_ERR(svn_utf_cstring_from_utf8(&baton
.conflict_modified
,
1530 conflict_modified
? conflict_modified
1531 : apr_psprintf(pool
, "<<<<<<< %s",
1534 SVN_ERR(svn_utf_cstring_from_utf8(&baton
.conflict_original
,
1535 conflict_original
? conflict_original
1536 : apr_psprintf(pool
, "||||||| %s",
1539 SVN_ERR(svn_utf_cstring_from_utf8(&baton
.conflict_separator
,
1540 conflict_separator
? conflict_separator
1541 : "=======", pool
));
1542 SVN_ERR(svn_utf_cstring_from_utf8(&baton
.conflict_latest
,
1543 conflict_latest
? conflict_latest
1544 : apr_psprintf(pool
, ">>>>>>> %s",
1548 baton
.display_original_in_conflict
= display_original_in_conflict
;
1549 baton
.display_resolved_conflicts
= display_resolved_conflicts
&&
1550 !display_original_in_conflict
;
1552 for (idx
= 0; idx
< 3; idx
++)
1554 SVN_ERR(map_or_read_file(&file
[idx
],
1556 &baton
.buffer
[idx
], &size
,
1557 baton
.path
[idx
], pool
));
1559 baton
.curp
[idx
] = baton
.buffer
[idx
];
1560 baton
.endp
[idx
] = baton
.buffer
[idx
];
1562 if (baton
.endp
[idx
])
1563 baton
.endp
[idx
] += size
;
1566 /* Check what eol marker we should use for conflict markers.
1567 We use the eol marker of the modified file and fall back on the
1568 platform's eol marker if that file doesn't contain any newlines. */
1569 eol
= detect_eol(baton
.buffer
[1], baton
.endp
[1]);
1573 /* Extend our conflict markers with the correct eol marker. */
1574 baton
.conflict_modified
= apr_pstrcat(pool
, baton
.conflict_modified
, eol
,
1576 baton
.conflict_original
= apr_pstrcat(pool
, baton
.conflict_original
, eol
,
1578 baton
.conflict_separator
= apr_pstrcat(pool
, baton
.conflict_separator
, eol
,
1580 baton
.conflict_latest
= apr_pstrcat(pool
, baton
.conflict_latest
, eol
,
1583 SVN_ERR(svn_diff_output(diff
, &baton
,
1584 &svn_diff3__file_output_vtable
));
1586 for (idx
= 0; idx
< 3; idx
++)
1591 apr_status_t rv
= apr_mmap_delete(mm
[idx
]);
1592 if (rv
!= APR_SUCCESS
)
1594 return svn_error_wrap_apr(rv
, _("Failed to delete mmap '%s'"),
1598 #endif /* APR_HAS_MMAP */
1602 SVN_ERR(svn_io_file_close(file
[idx
], pool
));
1606 return SVN_NO_ERROR
;