Followup to r29625: fix getopt tests.
[svn.git] / subversion / libsvn_diff / diff_file.c
blob35d1739b66c2a2163a61fa2617139356d1af926e
1 /*
2 * diff_file.c : routines for doing diffs on files
4 * ====================================================================
5 * Copyright (c) 2000-2006 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
20 #include <apr.h>
21 #include <apr_pools.h>
22 #include <apr_general.h>
23 #include <apr_file_io.h>
24 #include <apr_file_info.h>
25 #include <apr_time.h>
26 #include <apr_mmap.h>
27 #include <apr_getopt.h>
29 #include "svn_error.h"
30 #include "svn_diff.h"
31 #include "svn_types.h"
32 #include "svn_string.h"
33 #include "svn_io.h"
34 #include "svn_utf.h"
35 #include "svn_pools.h"
36 #include "diff.h"
37 #include "svn_private_config.h"
38 #include "svn_path.h"
39 #include "svn_ctype.h"
42 /* A token, i.e. a line read from a file. */
43 typedef struct svn_diff__file_token_t
45 /* Next token in free list. */
46 struct svn_diff__file_token_t *next;
47 svn_diff_datasource_e datasource;
48 /* Offset in the datasource. */
49 apr_off_t offset;
50 /* Offset of the normalized token (may skip leading whitespace) */
51 apr_off_t norm_offset;
52 /* Total length - before normalization. */
53 apr_off_t raw_length;
54 /* Total length - after normalization. */
55 apr_off_t length;
56 } svn_diff__file_token_t;
59 typedef struct svn_diff__file_baton_t
61 const svn_diff_file_options_t *options;
62 const char *path[4];
64 apr_file_t *file[4];
65 apr_off_t size[4];
67 int chunk[4];
68 char *buffer[4];
69 char *curp[4];
70 char *endp[4];
72 /* List of free tokens that may be reused. */
73 svn_diff__file_token_t *tokens;
75 svn_diff__normalize_state_t normalize_state[4];
77 apr_pool_t *pool;
78 } svn_diff__file_baton_t;
81 /* Look for the start of an end-of-line sequence (i.e. CR or LF)
82 * in the array pointed to by BUF, of length LEN.
83 * If such a byte is found, return the pointer to it, else return NULL.
85 static char *
86 find_eol_start(char *buf, apr_size_t len)
88 for (; len > 0; ++buf, --len)
90 if (*buf == '\n' || *buf == '\r')
91 return buf;
93 return NULL;
96 static int
97 datasource_to_index(svn_diff_datasource_e datasource)
99 switch (datasource)
101 case svn_diff_datasource_original:
102 return 0;
104 case svn_diff_datasource_modified:
105 return 1;
107 case svn_diff_datasource_latest:
108 return 2;
110 case svn_diff_datasource_ancestor:
111 return 3;
114 return -1;
117 /* Files are read in chunks of 128k. There is no support for this number
118 * whatsoever. If there is a number someone comes up with that has some
119 * argumentation, let's use that.
121 #define CHUNK_SHIFT 17
122 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
124 #define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
125 #define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
126 #define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
129 /* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
130 * *LENGTH. The actual bytes read are stored in *LENGTH on return.
132 static APR_INLINE svn_error_t *
133 read_chunk(apr_file_t *file, const char *path,
134 char *buffer, apr_size_t length,
135 apr_off_t offset, apr_pool_t *pool)
137 /* XXX: The final offset may not be the one we asked for.
138 * XXX: Check.
140 SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool));
141 SVN_ERR(svn_io_file_read_full(file, buffer, length, NULL, pool));
143 return SVN_NO_ERROR;
147 /* Map or read a file at PATH. *BUFFER will point to the file
148 * contents; if the file was mapped, *FILE and *MM will contain the
149 * mmap context; otherwise they will be NULL. SIZE will contain the
150 * file size. Allocate from POOL.
152 #if APR_HAS_MMAP
153 #define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
154 #define MMAP_T_ARG(NAME) &(NAME),
155 #else
156 #define MMAP_T_PARAM(NAME)
157 #define MMAP_T_ARG(NAME)
158 #endif
160 static svn_error_t *
161 map_or_read_file(apr_file_t **file,
162 MMAP_T_PARAM(mm)
163 char **buffer, apr_off_t *size,
164 const char *path, apr_pool_t *pool)
166 apr_finfo_t finfo;
167 apr_status_t rv;
169 *buffer = NULL;
171 SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
172 SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
174 #if APR_HAS_MMAP
175 if (finfo.size > APR_MMAP_THRESHOLD)
177 rv = apr_mmap_create(mm, *file, 0, finfo.size, APR_MMAP_READ, pool);
178 if (rv == APR_SUCCESS)
180 *buffer = (*mm)->mm;
183 /* On failure we just fall through and try reading the file into
184 * memory instead.
187 #endif /* APR_HAS_MMAP */
189 if (*buffer == NULL && finfo.size > 0)
191 *buffer = apr_palloc(pool, finfo.size);
193 SVN_ERR(svn_io_file_read_full(*file, *buffer, finfo.size, NULL, pool));
195 /* Since we have the entire contents of the file we can
196 * close it now.
198 SVN_ERR(svn_io_file_close(*file, pool));
200 *file = NULL;
203 *size = finfo.size;
205 return SVN_NO_ERROR;
209 /* Implements svn_diff_fns_t::datasource_open */
210 static svn_error_t *
211 datasource_open(void *baton, svn_diff_datasource_e datasource)
213 svn_diff__file_baton_t *file_baton = baton;
214 int idx;
215 apr_finfo_t finfo;
216 apr_size_t length;
217 char *curp;
218 char *endp;
220 idx = datasource_to_index(datasource);
222 SVN_ERR(svn_io_file_open(&file_baton->file[idx], file_baton->path[idx],
223 APR_READ, APR_OS_DEFAULT, file_baton->pool));
225 SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE,
226 file_baton->file[idx], file_baton->pool));
228 file_baton->size[idx] = finfo.size;
229 length = finfo.size > CHUNK_SIZE ? CHUNK_SIZE : finfo.size;
231 if (length == 0)
232 return SVN_NO_ERROR;
234 endp = curp = apr_palloc(file_baton->pool, length);
235 endp += length;
237 file_baton->buffer[idx] = file_baton->curp[idx] = curp;
238 file_baton->endp[idx] = endp;
240 SVN_ERR(read_chunk(file_baton->file[idx], file_baton->path[idx],
241 curp, length, 0, file_baton->pool));
243 return SVN_NO_ERROR;
247 /* Implements svn_diff_fns_t::datasource_close */
248 static svn_error_t *
249 datasource_close(void *baton, svn_diff_datasource_e datasource)
251 /* Do nothing. The compare_token function needs previous datasources
252 * to stay available until all datasources are processed.
255 return SVN_NO_ERROR;
258 /* Implements svn_diff_fns_t::datasource_get_next_token */
259 static svn_error_t *
260 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
261 svn_diff_datasource_e datasource)
263 svn_diff__file_baton_t *file_baton = baton;
264 svn_diff__file_token_t *file_token;
265 int idx;
266 char *endp;
267 char *curp;
268 char *eol;
269 int last_chunk;
270 apr_off_t length;
271 apr_uint32_t h = 0;
272 /* Did the last chunk end in a CR character? */
273 svn_boolean_t had_cr = FALSE;
275 *token = NULL;
277 idx = datasource_to_index(datasource);
279 curp = file_baton->curp[idx];
280 endp = file_baton->endp[idx];
282 last_chunk = offset_to_chunk(file_baton->size[idx]);
284 if (curp == endp
285 && last_chunk == file_baton->chunk[idx])
287 return SVN_NO_ERROR;
290 /* Get a new token */
291 file_token = file_baton->tokens;
292 if (file_token)
294 file_baton->tokens = file_token->next;
296 else
298 file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
301 file_token->datasource = datasource;
302 file_token->offset = chunk_to_offset(file_baton->chunk[idx])
303 + (curp - file_baton->buffer[idx]);
304 file_token->raw_length = 0;
305 file_token->length = 0;
307 while (1)
309 eol = find_eol_start(curp, endp - curp);
310 if (eol)
312 had_cr = (*eol == '\r');
313 eol++;
314 /* If we have the whole eol sequence in the chunk... */
315 if (!had_cr || eol != endp)
317 if (had_cr && *eol == '\n')
318 ++eol;
319 break;
323 if (file_baton->chunk[idx] == last_chunk)
325 eol = endp;
326 break;
329 length = endp - curp;
330 file_token->raw_length += length;
331 svn_diff__normalize_buffer(&curp, &length,
332 &file_baton->normalize_state[idx],
333 curp, file_baton->options);
334 file_token->length += length;
335 h = svn_diff__adler32(h, curp, length);
337 curp = endp = file_baton->buffer[idx];
338 file_baton->chunk[idx]++;
339 length = file_baton->chunk[idx] == last_chunk ?
340 offset_in_chunk(file_baton->size[idx]) : CHUNK_SIZE;
341 endp += length;
342 file_baton->endp[idx] = endp;
344 SVN_ERR(read_chunk(file_baton->file[idx], file_baton->path[idx],
345 curp, length,
346 chunk_to_offset(file_baton->chunk[idx]),
347 file_baton->pool));
349 /* If the last chunk ended in a CR, we're done. */
350 if (had_cr)
352 eol = curp;
353 if (*curp == '\n')
354 ++eol;
355 break;
359 length = eol - curp;
360 file_token->raw_length += length;
361 file_baton->curp[idx] = eol;
363 /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
364 * with a spurious empty token. Avoid returning it.
365 * Note that we use the unnormalized length; we don't want a line containing
366 * only spaces (and no trailing newline) to appear like a non-existent
367 * line. */
368 if (file_token->raw_length > 0)
370 char *c = curp;
371 svn_diff__normalize_buffer(&c, &length,
372 &file_baton->normalize_state[idx],
373 curp, file_baton->options);
375 file_token->norm_offset = file_token->offset + (c - curp);
376 file_token->length += length;
378 *hash = svn_diff__adler32(h, c, length);
379 *token = file_token;
382 return SVN_NO_ERROR;
385 #define COMPARE_CHUNK_SIZE 4096
387 /* Implements svn_diff_fns_t::token_compare */
388 static svn_error_t *
389 token_compare(void *baton, void *token1, void *token2, int *compare)
391 svn_diff__file_baton_t *file_baton = baton;
392 svn_diff__file_token_t *file_token[2];
393 char buffer[2][COMPARE_CHUNK_SIZE];
394 char *bufp[2];
395 apr_off_t offset[2];
396 int idx[2];
397 apr_off_t length[2];
398 apr_off_t total_length;
399 /* How much is left to read of each token from the file. */
400 apr_off_t raw_length[2];
401 int i;
402 int chunk[2];
403 svn_diff__normalize_state_t state[2];
405 file_token[0] = token1;
406 file_token[1] = token2;
407 if (file_token[0]->length < file_token[1]->length)
409 *compare = -1;
410 return SVN_NO_ERROR;
413 if (file_token[0]->length > file_token[1]->length)
415 *compare = 1;
416 return SVN_NO_ERROR;
419 total_length = file_token[0]->length;
420 if (total_length == 0)
422 *compare = 0;
423 return SVN_NO_ERROR;
426 for (i = 0; i < 2; ++i)
428 idx[i] = datasource_to_index(file_token[i]->datasource);
429 offset[i] = file_token[i]->norm_offset;
430 chunk[i] = file_baton->chunk[idx[i]];
431 state[i] = svn_diff__normalize_state_normal;
433 if (offset_to_chunk(offset[i]) == chunk[i])
435 /* If the start of the token is in memory, the entire token is
436 * in memory.
438 bufp[i] = file_baton->buffer[idx[i]];
439 bufp[i] += offset_in_chunk(offset[i]);
441 length[i] = total_length;
442 raw_length[i] = 0;
444 else
446 length[i] = 0;
447 raw_length[i] = file_token[i]->raw_length;
453 apr_off_t len;
454 for (i = 0; i < 2; i++)
456 if (length[i] == 0)
458 /* Error if raw_length is 0, that's an unexpected change
459 * of the file that can happen when ingoring whitespace
460 * and that can lead to an infinite loop. */
461 if (raw_length[i] == 0)
462 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
463 NULL,
464 _("The file '%s' changed unexpectedly"
465 " during diff"),
466 file_baton->path[idx[i]]);
468 /* Read a chunk from disk into a buffer */
469 bufp[i] = buffer[i];
470 length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
471 COMPARE_CHUNK_SIZE : raw_length[i];
473 SVN_ERR(read_chunk(file_baton->file[idx[i]],
474 file_baton->path[idx[i]],
475 bufp[i], length[i], offset[i],
476 file_baton->pool));
477 offset[i] += length[i];
478 raw_length[i] -= length[i];
479 /* bufp[i] gets reset to buffer[i] before reading each chunk,
480 so, overwriting it isn't a problem */
481 svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
482 bufp[i], file_baton->options);
486 len = length[0] > length[1] ? length[1] : length[0];
488 /* Compare two chunks (that could be entire tokens if they both reside
489 * in memory).
491 *compare = memcmp(bufp[0], bufp[1], len);
492 if (*compare != 0)
493 return SVN_NO_ERROR;
495 total_length -= len;
496 length[0] -= len;
497 length[1] -= len;
498 bufp[0] += len;
499 bufp[1] += len;
501 while(total_length > 0);
503 *compare = 0;
504 return SVN_NO_ERROR;
508 /* Implements svn_diff_fns_t::token_discard */
509 static void
510 token_discard(void *baton, void *token)
512 svn_diff__file_baton_t *file_baton = baton;
513 svn_diff__file_token_t *file_token = token;
515 file_token->next = file_baton->tokens;
516 file_baton->tokens = file_token;
520 /* Implements svn_diff_fns_t::token_discard_all */
521 static void
522 token_discard_all(void *baton)
524 svn_diff__file_baton_t *file_baton = baton;
526 /* Discard all memory in use by the tokens, and close all open files. */
527 svn_pool_clear(file_baton->pool);
531 static const svn_diff_fns_t svn_diff__file_vtable =
533 datasource_open,
534 datasource_close,
535 datasource_get_next_token,
536 token_compare,
537 token_discard,
538 token_discard_all
541 /* Id for the --ignore-eol-style option, which doesn't have a short name. */
542 #define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
544 /* Options supported by svn_diff_file_options_parse(). */
545 static const apr_getopt_option_t diff_options[] =
547 { "ignore-space-change", 'b', 0, NULL },
548 { "ignore-all-space", 'w', 0, NULL },
549 { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
550 { "show-c-function", 'p', 0, NULL },
551 /* ### For compatibility; we don't support the argument to -u, because
552 * ### we don't have optional argument support. */
553 { "unified", 'u', 0, NULL },
554 { NULL, 0, 0, NULL }
557 svn_diff_file_options_t *
558 svn_diff_file_options_create(apr_pool_t *pool)
560 return apr_pcalloc(pool, sizeof(svn_diff_file_options_t));
563 svn_error_t *
564 svn_diff_file_options_parse(svn_diff_file_options_t *options,
565 const apr_array_header_t *args,
566 apr_pool_t *pool)
568 apr_getopt_t *os;
569 /* Make room for each option (starting at index 1) plus trailing NULL. */
570 const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2));
572 argv[0] = "";
573 memcpy((void *) (argv + 1), args->elts, sizeof(char*) * args->nelts);
574 argv[args->nelts + 1] = NULL;
576 apr_getopt_init(&os, pool, args->nelts + 1, argv);
577 /* No printing of error messages, please! */
578 os->errfn = NULL;
579 while (1)
581 const char *opt_arg;
582 int opt_id;
583 apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
585 if (APR_STATUS_IS_EOF(err))
586 break;
587 if (err)
588 return svn_error_wrap_apr(err, _("Error parsing diff options"));
590 switch (opt_id)
592 case 'b':
593 /* -w takes precedence over -b. */
594 if (! options->ignore_space)
595 options->ignore_space = svn_diff_file_ignore_space_change;
596 break;
597 case 'w':
598 options->ignore_space = svn_diff_file_ignore_space_all;
599 break;
600 case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
601 options->ignore_eol_style = TRUE;
602 break;
603 case 'p':
604 options->show_c_function = TRUE;
605 break;
606 default:
607 break;
611 /* Check for spurious arguments. */
612 if (os->ind < os->argc)
613 return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
614 _("Invalid argument '%s' in diff options"),
615 os->argv[os->ind]);
617 return SVN_NO_ERROR;
620 svn_error_t *
621 svn_diff_file_diff_2(svn_diff_t **diff,
622 const char *original,
623 const char *modified,
624 const svn_diff_file_options_t *options,
625 apr_pool_t *pool)
627 svn_diff__file_baton_t baton;
629 memset(&baton, 0, sizeof(baton));
630 baton.options = options;
631 baton.path[0] = original;
632 baton.path[1] = modified;
633 baton.pool = svn_pool_create(pool);
635 SVN_ERR(svn_diff_diff(diff, &baton, &svn_diff__file_vtable, pool));
637 svn_pool_destroy(baton.pool);
638 return SVN_NO_ERROR;
641 svn_error_t *
642 svn_diff_file_diff(svn_diff_t **diff,
643 const char *original,
644 const char *modified,
645 apr_pool_t *pool)
647 return svn_diff_file_diff_2(diff, original, modified,
648 svn_diff_file_options_create(pool), pool);
651 svn_error_t *
652 svn_diff_file_diff3_2(svn_diff_t **diff,
653 const char *original,
654 const char *modified,
655 const char *latest,
656 const svn_diff_file_options_t *options,
657 apr_pool_t *pool)
659 svn_diff__file_baton_t baton;
661 memset(&baton, 0, sizeof(baton));
662 baton.options = options;
663 baton.path[0] = original;
664 baton.path[1] = modified;
665 baton.path[2] = latest;
666 baton.pool = svn_pool_create(pool);
668 SVN_ERR(svn_diff_diff3(diff, &baton, &svn_diff__file_vtable, pool));
670 svn_pool_destroy(baton.pool);
671 return SVN_NO_ERROR;
674 svn_error_t *
675 svn_diff_file_diff3(svn_diff_t **diff,
676 const char *original,
677 const char *modified,
678 const char *latest,
679 apr_pool_t *pool)
681 return svn_diff_file_diff3_2(diff, original, modified, latest,
682 svn_diff_file_options_create(pool), pool);
685 svn_error_t *
686 svn_diff_file_diff4_2(svn_diff_t **diff,
687 const char *original,
688 const char *modified,
689 const char *latest,
690 const char *ancestor,
691 const svn_diff_file_options_t *options,
692 apr_pool_t *pool)
694 svn_diff__file_baton_t baton;
696 memset(&baton, 0, sizeof(baton));
697 baton.options = options;
698 baton.path[0] = original;
699 baton.path[1] = modified;
700 baton.path[2] = latest;
701 baton.path[3] = ancestor;
702 baton.pool = svn_pool_create(pool);
704 SVN_ERR(svn_diff_diff4(diff, &baton, &svn_diff__file_vtable, pool));
706 svn_pool_destroy(baton.pool);
707 return SVN_NO_ERROR;
710 svn_error_t *
711 svn_diff_file_diff4(svn_diff_t **diff,
712 const char *original,
713 const char *modified,
714 const char *latest,
715 const char *ancestor,
716 apr_pool_t *pool)
718 return svn_diff_file_diff4_2(diff, original, modified, latest, ancestor,
719 svn_diff_file_options_create(pool), pool);
722 /** Display unified context diffs **/
724 /* Maximum length of the extra context to show when show_c_function is set.
725 * GNU diff uses 40, let's be brave and use 50 instead. */
726 #define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
727 typedef struct svn_diff__file_output_baton_t
729 svn_stream_t *output_stream;
730 const char *header_encoding;
732 /* Cached markers, in header_encoding. */
733 const char *context_str;
734 const char *delete_str;
735 const char *insert_str;
737 const char *path[2];
738 apr_file_t *file[2];
740 apr_off_t current_line[2];
742 char buffer[2][4096];
743 apr_size_t length[2];
744 char *curp[2];
746 apr_off_t hunk_start[2];
747 apr_off_t hunk_length[2];
748 svn_stringbuf_t *hunk;
750 /* Should we emit C functions in the unified diff header */
751 svn_boolean_t show_c_function;
752 /* Extra strings to skip over if we match. */
753 apr_array_header_t *extra_skip_match;
754 /* "Context" to append to the @@ line when the show_c_function option
755 * is set. */
756 svn_stringbuf_t *extra_context;
757 /* Extra context for the current hunk. */
758 char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
760 apr_pool_t *pool;
761 } svn_diff__file_output_baton_t;
763 typedef enum svn_diff__file_output_unified_type_e
765 svn_diff__file_output_unified_skip,
766 svn_diff__file_output_unified_context,
767 svn_diff__file_output_unified_delete,
768 svn_diff__file_output_unified_insert
769 } svn_diff__file_output_unified_type_e;
772 static svn_error_t *
773 output_unified_line(svn_diff__file_output_baton_t *baton,
774 svn_diff__file_output_unified_type_e type, int idx)
776 char *curp;
777 char *eol;
778 apr_size_t length;
779 svn_error_t *err;
780 svn_boolean_t bytes_processed = FALSE;
781 svn_boolean_t had_cr = FALSE;
782 /* Are we collecting extra context? */
783 svn_boolean_t collect_extra = FALSE;
785 length = baton->length[idx];
786 curp = baton->curp[idx];
788 /* Lazily update the current line even if we're at EOF.
789 * This way we fake output of context at EOF
791 baton->current_line[idx]++;
793 if (length == 0 && apr_file_eof(baton->file[idx]))
795 return SVN_NO_ERROR;
800 if (length > 0)
802 if (!bytes_processed)
804 switch (type)
806 case svn_diff__file_output_unified_context:
807 svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
808 baton->hunk_length[0]++;
809 baton->hunk_length[1]++;
810 break;
811 case svn_diff__file_output_unified_delete:
812 svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
813 baton->hunk_length[0]++;
814 break;
815 case svn_diff__file_output_unified_insert:
816 svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
817 baton->hunk_length[1]++;
818 break;
819 default:
820 break;
823 if (baton->show_c_function
824 && (type == svn_diff__file_output_unified_skip
825 || type == svn_diff__file_output_unified_context)
826 && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
827 && !svn_cstring_match_glob_list(curp,
828 baton->extra_skip_match))
830 svn_stringbuf_setempty(baton->extra_context);
831 collect_extra = TRUE;
835 eol = find_eol_start(curp, length);
837 if (eol != NULL)
839 apr_size_t len;
841 had_cr = (*eol == '\r');
842 eol++;
843 len = (apr_size_t)(eol - curp);
845 if (! had_cr || len < length)
847 if (had_cr && *eol == '\n')
849 ++eol;
850 ++len;
853 length -= len;
855 if (type != svn_diff__file_output_unified_skip)
857 svn_stringbuf_appendbytes(baton->hunk, curp, len);
859 if (collect_extra)
861 svn_stringbuf_appendbytes(baton->extra_context,
862 curp, len);
865 baton->curp[idx] = eol;
866 baton->length[idx] = length;
868 err = SVN_NO_ERROR;
870 break;
874 if (type != svn_diff__file_output_unified_skip)
876 svn_stringbuf_appendbytes(baton->hunk, curp, length);
879 if (collect_extra)
881 svn_stringbuf_appendbytes(baton->extra_context, curp, length);
884 bytes_processed = TRUE;
887 curp = baton->buffer[idx];
888 length = sizeof(baton->buffer[idx]);
890 err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
892 /* If the last chunk ended with a CR, we look for an LF at the start
893 of this chunk. */
894 if (had_cr)
896 if (! err && length > 0 && *curp == '\n')
898 if (type != svn_diff__file_output_unified_skip)
900 svn_stringbuf_appendbytes(baton->hunk, curp, 1);
902 /* We don't append the LF to extra_context, since it would
903 * just be stripped anyway. */
904 ++curp;
905 --length;
908 baton->curp[idx] = curp;
909 baton->length[idx] = length;
911 break;
914 while (! err);
916 if (err && ! APR_STATUS_IS_EOF(err->apr_err))
917 return err;
919 if (err && APR_STATUS_IS_EOF(err->apr_err))
921 svn_error_clear(err);
922 /* Special case if we reach the end of file AND the last line is in the
923 changed range AND the file doesn't end with a newline */
924 if (bytes_processed && (type != svn_diff__file_output_unified_skip)
925 && ! had_cr)
927 const char *out_str;
928 SVN_ERR(svn_utf_cstring_from_utf8_ex2
929 (&out_str,
930 /* The string below is intentionally not marked for
931 translation: it's vital to correct operation of
932 the diff(1)/patch(1) program pair. */
933 APR_EOL_STR "\\ No newline at end of file" APR_EOL_STR,
934 baton->header_encoding, baton->pool));
935 svn_stringbuf_appendcstr(baton->hunk, out_str);
938 baton->length[idx] = 0;
941 return SVN_NO_ERROR;
944 static svn_error_t *
945 output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
947 apr_off_t target_line;
948 apr_size_t hunk_len;
949 int i;
951 if (svn_stringbuf_isempty(baton->hunk))
953 /* Nothing to flush */
954 return SVN_NO_ERROR;
957 target_line = baton->hunk_start[0] + baton->hunk_length[0]
958 + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
960 /* Add trailing context to the hunk */
961 while (baton->current_line[0] < target_line)
963 SVN_ERR(output_unified_line
964 (baton, svn_diff__file_output_unified_context, 0));
967 /* If the file is non-empty, convert the line indexes from
968 zero based to one based */
969 for (i = 0; i < 2; i++)
971 if (baton->hunk_length[i] > 0)
972 baton->hunk_start[i]++;
975 /* Output the hunk header. If the hunk length is 1, the file is a one line
976 file. In this case, surpress the number of lines in the hunk (it is
977 1 implicitly)
979 SVN_ERR(svn_stream_printf_from_utf8(baton->output_stream,
980 baton->header_encoding,
981 baton->pool,
982 "@@ -%" APR_OFF_T_FMT,
983 baton->hunk_start[0]));
984 if (baton->hunk_length[0] != 1)
986 SVN_ERR(svn_stream_printf_from_utf8(baton->output_stream,
987 baton->header_encoding,
988 baton->pool, ",%" APR_OFF_T_FMT,
989 baton->hunk_length[0]));
992 SVN_ERR(svn_stream_printf_from_utf8(baton->output_stream,
993 baton->header_encoding,
994 baton->pool, " +%" APR_OFF_T_FMT,
995 baton->hunk_start[1]));
996 if (baton->hunk_length[1] != 1)
998 SVN_ERR(svn_stream_printf_from_utf8(baton->output_stream,
999 baton->header_encoding,
1000 baton->pool, ",%" APR_OFF_T_FMT,
1001 baton->hunk_length[1]));
1004 SVN_ERR(svn_stream_printf_from_utf8(baton->output_stream,
1005 baton->header_encoding,
1006 baton->pool, " @@%s%s" APR_EOL_STR,
1007 baton->hunk_extra_context[0]
1008 ? " " : "",
1009 baton->hunk_extra_context));
1011 /* Output the hunk content */
1012 hunk_len = baton->hunk->len;
1013 SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1014 &hunk_len));
1016 /* Prepare for the next hunk */
1017 baton->hunk_length[0] = 0;
1018 baton->hunk_length[1] = 0;
1019 svn_stringbuf_setempty(baton->hunk);
1021 return SVN_NO_ERROR;
1024 static svn_error_t *
1025 output_unified_diff_modified(void *baton,
1026 apr_off_t original_start, apr_off_t original_length,
1027 apr_off_t modified_start, apr_off_t modified_length,
1028 apr_off_t latest_start, apr_off_t latest_length)
1030 svn_diff__file_output_baton_t *output_baton = baton;
1031 apr_off_t target_line[2];
1032 int i;
1034 target_line[0] = original_start >= SVN_DIFF__UNIFIED_CONTEXT_SIZE
1035 ? original_start - SVN_DIFF__UNIFIED_CONTEXT_SIZE : 0;
1036 target_line[1] = modified_start;
1038 /* If the changed ranges are far enough apart (no overlapping or connecting
1039 context), flush the current hunk, initialize the next hunk and skip the
1040 lines not in context. Also do this when this is the first hunk.
1042 if (output_baton->current_line[0] < target_line[0]
1043 && (output_baton->hunk_start[0] + output_baton->hunk_length[0]
1044 + SVN_DIFF__UNIFIED_CONTEXT_SIZE < target_line[0]
1045 || output_baton->hunk_length[0] == 0))
1047 SVN_ERR(output_unified_flush_hunk(output_baton));
1049 output_baton->hunk_start[0] = target_line[0];
1050 output_baton->hunk_start[1] = target_line[1] + target_line[0]
1051 - original_start;
1053 /* Skip lines until we are at the beginning of the context we want to
1054 display */
1055 while (output_baton->current_line[0] < target_line[0])
1057 SVN_ERR(output_unified_line(output_baton,
1058 svn_diff__file_output_unified_skip, 0));
1061 if (output_baton->show_c_function)
1063 int p;
1065 /* Save the extra context for later use.
1066 * Note that the last byte of the hunk_extra_context array is never
1067 * touched after it is zero-initialized, so the array is always
1068 * 0-terminated. */
1069 strncpy(output_baton->hunk_extra_context,
1070 output_baton->extra_context->data,
1071 SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1072 /* Trim whitespace at the end, most notably to get rid of any
1073 * newline characters. */
1074 p = strlen(output_baton->hunk_extra_context);
1075 while (p > 0
1076 && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1078 output_baton->hunk_extra_context[--p] = '\0';
1083 /* Skip lines until we are at the start of the changed range */
1084 while (output_baton->current_line[1] < target_line[1])
1086 SVN_ERR(output_unified_line(output_baton,
1087 svn_diff__file_output_unified_skip, 1));
1090 /* Output the context preceding the changed range */
1091 while (output_baton->current_line[0] < original_start)
1093 SVN_ERR(output_unified_line(output_baton,
1094 svn_diff__file_output_unified_context, 0));
1097 target_line[0] = original_start + original_length;
1098 target_line[1] = modified_start + modified_length;
1100 /* Output the changed range */
1101 for (i = 0; i < 2; i++)
1103 while (output_baton->current_line[i] < target_line[i])
1105 SVN_ERR(output_unified_line
1106 (output_baton,
1107 i == 0 ? svn_diff__file_output_unified_delete
1108 : svn_diff__file_output_unified_insert, i));
1112 return SVN_NO_ERROR;
1115 /* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1116 static svn_error_t *
1117 output_unified_default_hdr(const char **header, const char *path,
1118 apr_pool_t *pool)
1120 apr_finfo_t file_info;
1121 apr_time_exp_t exploded_time;
1122 char time_buffer[64];
1123 apr_size_t time_len;
1125 SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1126 apr_time_exp_lt(&exploded_time, file_info.mtime);
1128 apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1129 "%a %b %e %H:%M:%S %Y", &exploded_time);
1131 *header = apr_psprintf(pool, "%s\t%s", path, time_buffer);
1133 return SVN_NO_ERROR;
1136 static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1138 NULL, /* output_common */
1139 output_unified_diff_modified,
1140 NULL, /* output_diff_latest */
1141 NULL, /* output_diff_common */
1142 NULL /* output_conflict */
1145 svn_error_t *
1146 svn_diff_file_output_unified3(svn_stream_t *output_stream,
1147 svn_diff_t *diff,
1148 const char *original_path,
1149 const char *modified_path,
1150 const char *original_header,
1151 const char *modified_header,
1152 const char *header_encoding,
1153 const char *relative_to_dir,
1154 svn_boolean_t show_c_function,
1155 apr_pool_t *pool)
1157 svn_diff__file_output_baton_t baton;
1158 int i;
1160 if (svn_diff_contains_diffs(diff))
1162 const char **c;
1164 memset(&baton, 0, sizeof(baton));
1165 baton.output_stream = output_stream;
1166 baton.pool = pool;
1167 baton.header_encoding = header_encoding;
1168 baton.path[0] = original_path;
1169 baton.path[1] = modified_path;
1170 baton.hunk = svn_stringbuf_create("", pool);
1171 baton.show_c_function = show_c_function;
1172 baton.extra_context = svn_stringbuf_create("", pool);
1173 baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1175 c = apr_array_push(baton.extra_skip_match);
1176 *c = "public:*";
1177 c = apr_array_push(baton.extra_skip_match);
1178 *c = "private:*";
1179 c = apr_array_push(baton.extra_skip_match);
1180 *c = "protected:*";
1182 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1183 header_encoding, pool));
1184 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1185 header_encoding, pool));
1186 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1187 header_encoding, pool));
1189 if (relative_to_dir)
1191 /* Possibly adjust the "original" and "modified" paths shown in
1192 the output (see issue #2723). */
1193 const char *child_path;
1195 if (! original_header)
1197 child_path = svn_path_is_child(relative_to_dir,
1198 original_path, pool);
1199 if (child_path)
1200 original_path = child_path;
1201 else
1202 return svn_error_createf(SVN_ERR_BAD_RELATIVE_PATH, NULL,
1203 _("Path '%s' must be an immediate child of "
1204 "the directory '%s'"),
1205 original_path, relative_to_dir);
1208 if (! modified_header)
1210 child_path = svn_path_is_child(relative_to_dir, modified_path, pool);
1211 if (child_path)
1212 modified_path = child_path;
1213 else
1214 return svn_error_createf(SVN_ERR_BAD_RELATIVE_PATH, NULL,
1215 _("Path '%s' must be an immediate child of "
1216 "the directory '%s'"),
1217 modified_path, relative_to_dir);
1221 for (i = 0; i < 2; i++)
1223 SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1224 APR_READ, APR_OS_DEFAULT, pool));
1227 if (original_header == NULL)
1229 SVN_ERR(output_unified_default_hdr
1230 (&original_header, original_path, pool));
1233 if (modified_header == NULL)
1235 SVN_ERR(output_unified_default_hdr
1236 (&modified_header, modified_path, pool));
1239 SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding, pool,
1240 "--- %s" APR_EOL_STR
1241 "+++ %s" APR_EOL_STR,
1242 original_header, modified_header));
1244 SVN_ERR(svn_diff_output(diff, &baton,
1245 &svn_diff__file_output_unified_vtable));
1246 SVN_ERR(output_unified_flush_hunk(&baton));
1248 for (i = 0; i < 2; i++)
1250 SVN_ERR(svn_io_file_close(baton.file[i], pool));
1254 return SVN_NO_ERROR;
1257 svn_error_t *
1258 svn_diff_file_output_unified2(svn_stream_t *output_stream,
1259 svn_diff_t *diff,
1260 const char *original_path,
1261 const char *modified_path,
1262 const char *original_header,
1263 const char *modified_header,
1264 const char *header_encoding,
1265 apr_pool_t *pool)
1267 return svn_diff_file_output_unified3(output_stream, diff,
1268 original_path, modified_path,
1269 original_header, modified_header,
1270 header_encoding, NULL, FALSE, pool);
1273 svn_error_t *
1274 svn_diff_file_output_unified(svn_stream_t *output_stream,
1275 svn_diff_t *diff,
1276 const char *original_path,
1277 const char *modified_path,
1278 const char *original_header,
1279 const char *modified_header,
1280 apr_pool_t *pool)
1282 return svn_diff_file_output_unified2(output_stream, diff,
1283 original_path, modified_path,
1284 original_header, modified_header,
1285 SVN_APR_LOCALE_CHARSET, pool);
1289 /** Display diff3 **/
1291 typedef struct svn_diff3__file_output_baton_t
1293 svn_stream_t *output_stream;
1295 const char *path[3];
1297 apr_off_t current_line[3];
1299 char *buffer[3];
1300 char *endp[3];
1301 char *curp[3];
1303 /* The following four members are in the encoding used for the output. */
1304 const char *conflict_modified;
1305 const char *conflict_original;
1306 const char *conflict_separator;
1307 const char *conflict_latest;
1309 svn_boolean_t display_original_in_conflict;
1310 svn_boolean_t display_resolved_conflicts;
1312 apr_pool_t *pool;
1313 } svn_diff3__file_output_baton_t;
1315 typedef enum svn_diff3__file_output_type_e
1317 svn_diff3__file_output_skip,
1318 svn_diff3__file_output_normal
1319 } svn_diff3__file_output_type_e;
1322 static svn_error_t *
1323 output_line(svn_diff3__file_output_baton_t *baton,
1324 svn_diff3__file_output_type_e type, int idx)
1326 char *curp;
1327 char *endp;
1328 char *eol;
1329 apr_size_t len;
1331 curp = baton->curp[idx];
1332 endp = baton->endp[idx];
1334 /* Lazily update the current line even if we're at EOF.
1336 baton->current_line[idx]++;
1338 if (curp == endp)
1339 return SVN_NO_ERROR;
1341 eol = find_eol_start(curp, endp - curp);
1342 if (!eol)
1343 eol = endp;
1344 else
1346 svn_boolean_t had_cr = (*eol == '\r');
1347 eol++;
1348 if (had_cr && eol != endp && *eol == '\n')
1349 eol++;
1352 if (type != svn_diff3__file_output_skip)
1354 len = eol - curp;
1355 SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
1358 baton->curp[idx] = eol;
1360 return SVN_NO_ERROR;
1363 static svn_error_t *
1364 output_hunk(void *baton, int idx, apr_off_t target_line,
1365 apr_off_t target_length)
1367 svn_diff3__file_output_baton_t *output_baton = baton;
1369 /* Skip lines until we are at the start of the changed range */
1370 while (output_baton->current_line[idx] < target_line)
1372 SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
1375 target_line += target_length;
1377 while (output_baton->current_line[idx] < target_line)
1379 SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
1382 return SVN_NO_ERROR;
1385 static svn_error_t *
1386 output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
1387 apr_off_t modified_start, apr_off_t modified_length,
1388 apr_off_t latest_start, apr_off_t latest_length)
1390 return output_hunk(baton, 1, modified_start, modified_length);
1393 static svn_error_t *
1394 output_diff_modified(void *baton,
1395 apr_off_t original_start, apr_off_t original_length,
1396 apr_off_t modified_start, apr_off_t modified_length,
1397 apr_off_t latest_start, apr_off_t latest_length)
1399 return output_hunk(baton, 1, modified_start, modified_length);
1402 static svn_error_t *
1403 output_diff_latest(void *baton,
1404 apr_off_t original_start, apr_off_t original_length,
1405 apr_off_t modified_start, apr_off_t modified_length,
1406 apr_off_t latest_start, apr_off_t latest_length)
1408 return output_hunk(baton, 2, latest_start, latest_length);
1411 static svn_error_t *
1412 output_conflict(void *baton,
1413 apr_off_t original_start, apr_off_t original_length,
1414 apr_off_t modified_start, apr_off_t modified_length,
1415 apr_off_t latest_start, apr_off_t latest_length,
1416 svn_diff_t *diff);
1418 static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
1420 output_common,
1421 output_diff_modified,
1422 output_diff_latest,
1423 output_diff_modified, /* output_diff_common */
1424 output_conflict
1427 static svn_error_t *
1428 output_conflict(void *baton,
1429 apr_off_t original_start, apr_off_t original_length,
1430 apr_off_t modified_start, apr_off_t modified_length,
1431 apr_off_t latest_start, apr_off_t latest_length,
1432 svn_diff_t *diff)
1434 svn_diff3__file_output_baton_t *file_baton = baton;
1435 apr_size_t len;
1437 if (diff && file_baton->display_resolved_conflicts)
1439 return svn_diff_output(diff, baton,
1440 &svn_diff3__file_output_vtable);
1443 len = strlen(file_baton->conflict_modified);
1444 SVN_ERR(svn_stream_write(file_baton->output_stream,
1445 file_baton->conflict_modified,
1446 &len));
1448 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
1450 if (file_baton->display_original_in_conflict)
1452 len = strlen(file_baton->conflict_original);
1453 SVN_ERR(svn_stream_write(file_baton->output_stream,
1454 file_baton->conflict_original, &len));
1456 SVN_ERR(output_hunk(baton, 0, original_start, original_length));
1459 len = strlen(file_baton->conflict_separator);
1460 SVN_ERR(svn_stream_write(file_baton->output_stream,
1461 file_baton->conflict_separator, &len));
1463 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
1465 len = strlen(file_baton->conflict_latest);
1466 SVN_ERR(svn_stream_write(file_baton->output_stream,
1467 file_baton->conflict_latest, &len));
1469 return SVN_NO_ERROR;
1472 /* Return the first eol marker found in [BUF, ENDP) as a
1473 * NUL-terminated string, or NULL if no eol marker is found.
1475 * If the last valid character of BUF is the first byte of a
1476 * potentially two-byte eol sequence, just return "\r", that is,
1477 * assume BUF represents a CR-only file. This is correct for callers
1478 * that pass an entire file at once, and is no more likely to be
1479 * incorrect than correct for any caller that doesn't.
1481 static const char *
1482 detect_eol(char *buf, char *endp)
1484 const char *eol = find_eol_start(buf, endp - buf);
1485 if (eol)
1487 if (*eol == '\n')
1488 return "\n";
1490 /* We found a CR. */
1491 ++eol;
1492 if (eol == endp || *eol != '\n')
1493 return "\r";
1494 return "\r\n";
1497 return NULL;
1500 svn_error_t *
1501 svn_diff_file_output_merge(svn_stream_t *output_stream,
1502 svn_diff_t *diff,
1503 const char *original_path,
1504 const char *modified_path,
1505 const char *latest_path,
1506 const char *conflict_original,
1507 const char *conflict_modified,
1508 const char *conflict_latest,
1509 const char *conflict_separator,
1510 svn_boolean_t display_original_in_conflict,
1511 svn_boolean_t display_resolved_conflicts,
1512 apr_pool_t *pool)
1514 svn_diff3__file_output_baton_t baton;
1515 apr_file_t *file[3];
1516 apr_off_t size;
1517 int idx;
1518 #if APR_HAS_MMAP
1519 apr_mmap_t *mm[3] = { 0 };
1520 #endif /* APR_HAS_MMAP */
1521 const char *eol;
1523 memset(&baton, 0, sizeof(baton));
1524 baton.output_stream = output_stream;
1525 baton.pool = pool;
1526 baton.path[0] = original_path;
1527 baton.path[1] = modified_path;
1528 baton.path[2] = latest_path;
1529 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
1530 conflict_modified ? conflict_modified
1531 : apr_psprintf(pool, "<<<<<<< %s",
1532 modified_path),
1533 pool));
1534 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
1535 conflict_original ? conflict_original
1536 : apr_psprintf(pool, "||||||| %s",
1537 original_path),
1538 pool));
1539 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
1540 conflict_separator ? conflict_separator
1541 : "=======", pool));
1542 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
1543 conflict_latest ? conflict_latest
1544 : apr_psprintf(pool, ">>>>>>> %s",
1545 latest_path),
1546 pool));
1548 baton.display_original_in_conflict = display_original_in_conflict;
1549 baton.display_resolved_conflicts = display_resolved_conflicts &&
1550 !display_original_in_conflict;
1552 for (idx = 0; idx < 3; idx++)
1554 SVN_ERR(map_or_read_file(&file[idx],
1555 MMAP_T_ARG(mm[idx])
1556 &baton.buffer[idx], &size,
1557 baton.path[idx], pool));
1559 baton.curp[idx] = baton.buffer[idx];
1560 baton.endp[idx] = baton.buffer[idx];
1562 if (baton.endp[idx])
1563 baton.endp[idx] += size;
1566 /* Check what eol marker we should use for conflict markers.
1567 We use the eol marker of the modified file and fall back on the
1568 platform's eol marker if that file doesn't contain any newlines. */
1569 eol = detect_eol(baton.buffer[1], baton.endp[1]);
1570 if (! eol)
1571 eol = APR_EOL_STR;
1573 /* Extend our conflict markers with the correct eol marker. */
1574 baton.conflict_modified = apr_pstrcat(pool, baton.conflict_modified, eol,
1575 NULL);
1576 baton.conflict_original = apr_pstrcat(pool, baton.conflict_original, eol,
1577 NULL);
1578 baton.conflict_separator = apr_pstrcat(pool, baton.conflict_separator, eol,
1579 NULL);
1580 baton.conflict_latest = apr_pstrcat(pool, baton.conflict_latest, eol,
1581 NULL);
1583 SVN_ERR(svn_diff_output(diff, &baton,
1584 &svn_diff3__file_output_vtable));
1586 for (idx = 0; idx < 3; idx++)
1588 #if APR_HAS_MMAP
1589 if (mm[idx])
1591 apr_status_t rv = apr_mmap_delete(mm[idx]);
1592 if (rv != APR_SUCCESS)
1594 return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
1595 baton.path[idx]);
1598 #endif /* APR_HAS_MMAP */
1600 if (file[idx])
1602 SVN_ERR(svn_io_file_close(file[idx], pool));
1606 return SVN_NO_ERROR;