1 /* $NetBSD: sdiff.c,v 1.1 2007/02/18 22:13:43 rmind Exp $ */
2 /* $OpenBSD: sdiff.c,v 1.20 2006/09/19 05:52:23 otto Exp $ */
5 * Written by Raymond Lai <ray@cyth.net>.
10 #include <sys/queue.h>
12 #include <sys/types.h>
33 * Each column must be at least one character wide, plus three
34 * characters between the columns (space, [<|>], space).
38 /* A single diff line. */
40 SIMPLEQ_ENTRY(diffline
) diffentries
;
46 static void astrcat(char **, const char *);
47 static void enqueue(char *, char, char *);
48 static char *mktmpcpy(const char *);
49 static void freediff(struct diffline
*);
50 static void int_usage(void);
51 static int parsecmd(FILE *, FILE *, FILE *);
52 static void printa(FILE *, size_t);
53 static void printc(FILE *, size_t, FILE *, size_t);
54 static void printcol(const char *, size_t *, const size_t);
55 static void printd(FILE *, size_t);
56 static void println(const char *, const char, const char *);
57 static void processq(void);
58 static void prompt(const char *, const char *);
59 __dead
static void usage(void);
60 static char *xfgets(FILE *);
62 SIMPLEQ_HEAD(, diffline
) diffhead
= SIMPLEQ_HEAD_INITIALIZER(diffhead
);
63 size_t line_width
; /* width of a line (two columns and divider) */
64 size_t width
; /* width of each column */
65 size_t file1ln
, file2ln
; /* line number of file1 and file2 */
66 int Iflag
= 0; /* ignore sets matching regexp */
67 int lflag
; /* print only left column for identical lines */
68 int sflag
; /* skip identical lines */
69 FILE *outfile
; /* file to save changes to */
70 const char *tmpdir
; /* TMPDIR or /tmp */
72 static struct option longopts
[] = {
73 { "text", no_argument
, NULL
, 'a' },
74 { "ignore-blank-lines", no_argument
, NULL
, 'B' },
75 { "ignore-space-change", no_argument
, NULL
, 'b' },
76 { "minimal", no_argument
, NULL
, 'd' },
77 { "ignore-tab-expansion", no_argument
, NULL
, 'E' },
78 { "diff-program", required_argument
, NULL
, 'F' },
79 { "speed-large-files", no_argument
, NULL
, 'H' },
80 { "ignore-matching-lines", required_argument
, NULL
, 'I' },
81 { "left-column", no_argument
, NULL
, 'l' },
82 { "output", required_argument
, NULL
, 'o' },
83 { "strip-trailing-cr", no_argument
, NULL
, 'S' },
84 { "suppress-common-lines", no_argument
, NULL
, 's' },
85 { "expand-tabs", no_argument
, NULL
, 't' },
86 { "ignore-all-space", no_argument
, NULL
, 'W' },
87 { "width", required_argument
, NULL
, 'w' },
92 * Create temporary file if source_file is not a regular file.
93 * Returns temporary file name if one was malloced, NULL if unnecessary.
96 mktmpcpy(const char *source_file
)
104 /* Open input and output. */
105 ifd
= open(source_file
, O_RDONLY
, 0);
106 /* File was opened successfully. */
108 if (fstat(ifd
, &sb
) == -1)
109 err(2, "error getting file status from %s", source_file
);
112 if (S_ISREG(sb
.st_mode
))
115 /* If ``-'' does not exist the user meant stdin. */
116 if (errno
== ENOENT
&& strcmp(source_file
, "-") == 0)
119 err(2, "error opening %s", source_file
);
122 /* Not a regular file, so copy input into temporary file. */
123 if (asprintf(&target_file
, "%s/sdiff.XXXXXXXXXX", tmpdir
) == -1)
125 if ((ofd
= mkstemp(target_file
)) == -1) {
126 warn("error opening %s", target_file
);
129 while ((rcount
= read(ifd
, buf
, sizeof(buf
))) != -1 &&
133 wcount
= write(ofd
, buf
, (size_t)rcount
);
134 if (-1 == wcount
|| rcount
!= wcount
) {
135 warn("error writing to %s", target_file
);
140 warn("error reading from %s", source_file
);
147 return (target_file
);
155 main(int argc
, char **argv
)
157 FILE *diffpipe
, *file1
, *file2
;
158 size_t diffargc
= 0, wflag
= WIDTH
;
159 int ch
, fd
[2], status
;
161 char **diffargv
, *diffprog
= "diff", *filename1
, *filename2
,
162 *tmp1
, *tmp2
, *s1
, *s2
;
165 * Process diff flags.
168 * Allocate memory for diff arguments and NULL.
169 * Each flag has at most one argument, so doubling argc gives an
170 * upper limit of how many diff args can be passed. argv[0],
171 * file1, and file2 won't have arguments so doubling them will
172 * waste some memory; however we need an extra space for the
173 * NULL at the end, so it sort of works out.
175 if (!(diffargv
= malloc(sizeof(char **) * argc
* 2)))
178 /* Add first argument, the program name. */
179 diffargv
[diffargc
++] = diffprog
;
181 while ((ch
= getopt_long(argc
, argv
, "aBbdEHI:ilo:stWw:",
182 longopts
, NULL
)) != -1) {
187 diffargv
[diffargc
++] = "-a";
190 diffargv
[diffargc
++] = "-B";
193 diffargv
[diffargc
++] = "-b";
196 diffargv
[diffargc
++] = "-d";
199 diffargv
[diffargc
++] = "-E";
202 diffargv
[0] = diffprog
= optarg
;
205 diffargv
[diffargc
++] = "-H";
209 diffargv
[diffargc
++] = "-I";
210 diffargv
[diffargc
++] = optarg
;
213 diffargv
[diffargc
++] = "-i";
219 if ((outfile
= fopen(optarg
, "w")) == NULL
)
220 err(2, "could not open: %s", optarg
);
223 diffargv
[diffargc
++] = "--strip-trailing-cr";
229 diffargv
[diffargc
++] = "-t";
232 diffargv
[diffargc
++] = "-w";
235 wflag
= strtonum(optarg
, WIDTH_MIN
,
238 errx(2, "width is %s: %s", errstr
, optarg
);
251 if ((tmpdir
= getenv("TMPDIR")) == NULL
)
258 * Create temporary files for diff and sdiff to share if file1
259 * or file2 are not regular files. This allows sdiff and diff
260 * to read the same inputs if one or both inputs are stdin.
262 * If any temporary files were created, their names would be
263 * saved in tmp1 or tmp2. tmp1 should never equal tmp2.
266 /* file1 and file2 are the same, so copy to same temp file. */
267 if (strcmp(filename1
, filename2
) == 0) {
268 if ((tmp1
= mktmpcpy(filename1
)))
269 filename1
= filename2
= tmp1
;
270 /* Copy file1 and file2 into separate temp files. */
272 if ((tmp1
= mktmpcpy(filename1
)))
274 if ((tmp2
= mktmpcpy(filename2
)))
278 diffargv
[diffargc
++] = filename1
;
279 diffargv
[diffargc
++] = filename2
;
280 /* Add NULL to end of array to indicate end of array. */
281 diffargv
[diffargc
++] = NULL
;
283 /* Subtract column divider and divide by two. */
284 width
= (wflag
- 3) / 2;
285 /* Make sure line_width can fit in size_t. */
286 if (width
> (SIZE_T_MAX
- 3) / 2)
287 errx(2, "width is too large: %zu", width
);
288 line_width
= width
* 2 + 3;
293 switch(pid
= fork()) {
296 /* We don't read from the pipe. */
298 if (dup2(fd
[1], STDOUT_FILENO
) == -1)
299 err(2, "child could not duplicate descriptor");
300 /* Free unused descriptor. */
303 execvp(diffprog
, diffargv
);
304 err(2, "could not execute diff: %s", diffprog
);
306 err(2, "could not fork");
310 /* We don't write to the pipe. */
313 /* Open pipe to diff command. */
314 if ((diffpipe
= fdopen(fd
[0], "r")) == NULL
)
315 err(2, "could not open diff pipe");
316 if ((file1
= fopen(filename1
, "r")) == NULL
)
317 err(2, "could not open %s", filename1
);
318 if ((file2
= fopen(filename2
, "r")) == NULL
)
319 err(2, "could not open %s", filename2
);
321 /* Line numbers start at one. */
322 file1ln
= file2ln
= 1;
324 /* Read and parse diff output. */
325 while (parsecmd(diffpipe
, file1
, file2
) != EOF
)
329 /* Wait for diff to exit. */
330 if (waitpid(pid
, &status
, 0) == -1 || !WIFEXITED(status
) ||
331 WEXITSTATUS(status
) >= 2)
332 err(2, "diff exited abnormally");
334 /* Delete and free unneeded temporary files. */
337 warn("error deleting %s", tmp1
);
340 warn("error deleting %s", tmp2
);
343 filename1
= filename2
= tmp1
= tmp2
= NULL
;
345 /* No more diffs, so print common lines. */
347 while ((s1
= xfgets(file1
)))
348 enqueue(s1
, ' ', NULL
);
354 enqueue(s1
, ' ', s2
);
360 /* Process unmodified lines. */
363 /* Return diff exit status. */
364 return (WEXITSTATUS(status
));
368 * Prints an individual column (left or right), taking into account
369 * that tabs are variable-width. Takes a string, the current column
370 * the cursor is on the screen, and the maximum value of the column.
371 * The column value is updated as we go along.
374 printcol(const char *s
, size_t *col
, const size_t col_max
)
377 for (; *s
&& *col
< col_max
; ++s
) {
383 * If rounding to next multiple of eight causes
384 * an integer overflow, just return.
386 if (*col
> SIZE_T_MAX
- 8)
389 /* Round to next multiple of eight. */
390 new_col
= (*col
/ 8 + 1) * 8;
393 * If printing the tab goes past the column
394 * width, don't print it and just quit.
396 if (new_col
> col_max
)
410 * Prompts user to either choose between two strings or edit one, both,
414 prompt(const char *s1
, const char *s2
)
418 /* Print command prompt. */
421 /* Get user input. */
422 for (; (cmd
= xfgets(stdin
)); free(cmd
)) {
425 /* Skip leading whitespace. */
426 for (p
= cmd
; isspace((int)(*p
)); ++p
)
434 if (eparse(p
, s1
, s2
) == -1)
439 /* Choose left column as-is. */
441 fprintf(outfile
, "%s\n", s1
);
443 /* End of command parsing. */
450 /* Choose right column as-is. */
452 fprintf(outfile
, "%s\n", s2
);
454 /* End of command parsing. */
466 /* Interactive usage help. */
472 /* Prompt user again. */
481 * If there was no error, we received an EOF from stdin, so we
490 * Takes two strings, separated by a column divider. NULL strings are
491 * treated as empty columns. If the divider is the ` ' character, the
492 * second column is not printed (-l flag). In this case, the second
493 * string must be NULL. When the second column is NULL, the divider
494 * does not print the trailing space following the divider character.
496 * Takes into account that tabs can take multiple columns.
499 println(const char *s1
, const char divc
, const char *s2
)
503 /* Print first column. Skips if s1 == NULL. */
506 /* Skip angle bracket and space. */
507 printcol(s1
, &col
, width
);
511 /* Only print left column. */
512 if (divc
== ' ' && !s2
) {
517 /* Otherwise, we pad this column up to width. */
518 for (; col
< width
; ++col
)
522 * Print column divider. If there is no second column, we don't
523 * need to add the space for padding.
526 printf(" %c\n", divc
);
529 printf(" %c ", divc
);
532 /* Skip angle bracket and space. */
533 printcol(s2
, &col
, line_width
);
539 * Reads a line from file and returns as a string. If EOF is reached,
540 * NULL is returned. The returned string must be freed afterwards.
545 const char delim
[3] = {'\0', '\0', '\0'};
548 /* XXX - Is this necessary? */
551 if (!(s
= fparseln(file
, NULL
, NULL
, delim
, 0)) &&
553 err(2, "error reading file");
563 * Parse ed commands from diffpipe and print lines from file1 (lines
564 * to change or delete) or file2 (lines to add or change).
568 parsecmd(FILE *diffpipe
, FILE *file1
, FILE *file2
)
570 size_t file1start
, file1end
, file2start
, file2end
, n
;
571 /* ed command line and pointer to characters in line */
576 /* Read ed command. */
577 if (!(line
= xfgets(diffpipe
)))
581 /* Go to character after line number. */
582 while (isdigit((int)(*p
)))
586 file1start
= strtonum(line
, 0, INT_MAX
, &errstr
);
588 errx(2, "file1 start is %s: %s", errstr
, line
);
590 /* A range is specified for file1. */
594 /* Go to character after file2end. */
595 while (isdigit((int)(*p
)))
599 file1end
= strtonum(q
, 0, INT_MAX
, &errstr
);
601 errx(2, "file1 end is %s: %s", errstr
, line
);
602 if (file1start
> file1end
)
603 errx(2, "invalid line range in file1: %s", line
);
606 file1end
= file1start
;
609 /* Check that cmd is valid. */
610 if (!(cmd
== 'a' || cmd
== 'c' || cmd
== 'd'))
611 errx(2, "ed command not recognized: %c: %s", cmd
, line
);
614 /* Go to character after line number. */
615 while (isdigit((int)(*p
)))
619 file2start
= strtonum(q
, 0, INT_MAX
, &errstr
);
621 errx(2, "file2 start is %s: %s", errstr
, line
);
624 * There should either be a comma signifying a second line
625 * number or the line should just end here.
627 if (c
!= ',' && c
!= '\0')
628 errx(2, "invalid line range in file2: %c: %s", c
, line
);
632 file2end
= strtonum(p
, 0, INT_MAX
, &errstr
);
634 errx(2, "file2 end is %s: %s", errstr
, line
);
635 if (file2start
>= file2end
)
636 errx(2, "invalid line range in file2: %s", line
);
638 file2end
= file2start
;
640 /* Appends happen _after_ stated line. */
642 if (file1start
!= file1end
)
643 errx(2, "append cannot have a file1 range: %s",
645 if (file1start
== SIZE_T_MAX
)
646 errx(2, "file1 line range too high: %s", line
);
647 file1start
= ++file1end
;
650 * I'm not sure what the deal is with the line numbers for
653 else if (cmd
== 'd') {
654 if (file2start
!= file2end
)
655 errx(2, "delete cannot have a file2 range: %s",
657 if (file2start
== SIZE_T_MAX
)
658 errx(2, "file2 line range too high: %s", line
);
659 file2start
= ++file2end
;
663 * Continue reading file1 and file2 until we reach line numbers
664 * specified by diff. Should only happen with -I flag.
666 for (; file1ln
< file1start
&& file2ln
< file2start
;
667 ++file1ln
, ++file2ln
) {
670 if (!(s1
= xfgets(file1
)))
671 errx(2, "file1 shorter than expected");
672 if (!(s2
= xfgets(file2
)))
673 errx(2, "file2 shorter than expected");
675 /* If the -l flag was specified, print only left column. */
679 * XXX - If -l and -I are both specified, all
680 * unchanged or ignored lines are shown with a
681 * `(' divider. This matches GNU sdiff, but I
682 * believe it is a bug. Just check out:
683 * gsdiff -l -I '^$' samefile samefile.
686 enqueue(s1
, '(', NULL
);
688 enqueue(s1
, ' ', NULL
);
690 enqueue(s1
, ' ', s2
);
692 /* Ignore deleted lines. */
693 for (; file1ln
< file1start
; ++file1ln
) {
696 if (!(s
= xfgets(file1
)))
697 errx(2, "file1 shorter than expected");
699 enqueue(s
, '(', NULL
);
701 /* Ignore added lines. */
702 for (; file2ln
< file2start
; ++file2ln
) {
705 if (!(s
= xfgets(file2
)))
706 errx(2, "file2 shorter than expected");
708 /* If -l flag was given, don't print right column. */
712 enqueue(NULL
, ')', s
);
715 /* Process unmodified or skipped lines. */
720 printa(file2
, file2end
);
721 n
= file2end
- file2start
+ 1;
725 printc(file1
, file1end
, file2
, file2end
);
726 n
= file1end
- file1start
+ 1 + 1 + file2end
- file2start
+ 1;
730 printd(file1
, file1end
);
731 n
= file1end
- file1start
+ 1;
735 errx(2, "invalid diff command: %c: %s", cmd
, line
);
738 /* Skip to next ed line. */
740 if (!xfgets(diffpipe
))
741 errx(2, "diff ended early");
747 * Queues up a diff line.
750 enqueue(char *left
, char divc
, char *right
)
752 struct diffline
*diffp
;
754 if (!(diffp
= malloc(sizeof(struct diffline
))))
758 diffp
->right
= right
;
759 SIMPLEQ_INSERT_TAIL(&diffhead
, diffp
, diffentries
);
763 * Free a diffline structure and its elements.
766 freediff(struct diffline
*diffp
)
774 * Append second string into first. Repeated appends to the same string
775 * are cached, making this an O(n) function, where n = strlen(append).
778 astrcat(char **s
, const char *append
)
780 /* Length of string in previous run. */
781 static size_t offset
= 0;
784 * String from previous run. Compared to *s to see if we are
785 * dealing with the same string. If so, we can use offset.
787 static const char *oldstr
= NULL
;
792 * First string is NULL, so just copy append.
795 if (!(*s
= strdup(append
)))
798 /* Keep track of string. */
806 * *s is a string so concatenate.
809 /* Did we process the same string in the last run? */
811 * If this is a different string from the one we just processed
819 /* Size = strlen(*s) + \n + strlen(append) + '\0'. */
820 newsiz
= offset
+ 1 + strlen(append
) + 1;
822 /* Resize *s to fit new string. */
823 newstr
= realloc(*s
, newsiz
);
828 /* *s + offset should be end of string. */
830 strlcpy(*s
+ offset
, "\n", newsiz
- offset
);
831 strlcat(*s
+ offset
, append
, newsiz
- offset
);
833 /* New string length should be exactly newsiz - 1 characters. */
834 /* Store generated string's values. */
840 * Process diff set queue, printing, prompting, and saving each diff
841 * line stored in queue.
846 struct diffline
*diffp
;
847 char divc
, *left
, *right
;
849 /* Don't process empty queue. */
850 if (SIMPLEQ_EMPTY(&diffhead
))
853 /* Remember the divider. */
854 divc
= SIMPLEQ_FIRST(&diffhead
)->div
;
859 * Go through set of diffs, concatenating each line in left or
860 * right column into two long strings, `left' and `right'.
862 SIMPLEQ_FOREACH(diffp
, &diffhead
, diffentries
) {
864 * Print changed lines if -s was given,
865 * print all lines if -s was not given.
867 if (!sflag
|| diffp
->div
== '|' || diffp
->div
== '<' ||
869 println(diffp
->left
, diffp
->div
, diffp
->right
);
871 /* Append new lines to diff set. */
873 astrcat(&left
, diffp
->left
);
875 astrcat(&right
, diffp
->right
);
878 /* Empty queue and free each diff line and its elements. */
879 while (!SIMPLEQ_EMPTY(&diffhead
)) {
880 diffp
= SIMPLEQ_FIRST(&diffhead
);
881 SIMPLEQ_REMOVE_HEAD(&diffhead
, diffentries
);
885 /* Write to outfile, prompting user if lines are different. */
888 case ' ': case '(': case ')':
889 fprintf(outfile
, "%s\n", left
);
891 case '|': case '<': case '>':
895 errx(2, "invalid divider: %c", divc
);
898 /* Free left and right. */
904 * Print lines following an (a)ppend command.
907 printa(FILE *file
, size_t line2
)
911 for (; file2ln
<= line2
; ++file2ln
) {
912 if (!(line
= xfgets(file
)))
913 errx(2, "append ended early");
914 enqueue(NULL
, '>', line
);
921 * Print lines following a (c)hange command, from file1ln to file1end
922 * and from file2ln to file2end.
925 printc(FILE *file1
, size_t file1end
, FILE *file2
, size_t file2end
)
928 SIMPLEQ_ENTRY(fileline
) fileentries
;
931 SIMPLEQ_HEAD(, fileline
) delqhead
= SIMPLEQ_HEAD_INITIALIZER(delqhead
);
933 /* Read lines to be deleted. */
934 for (; file1ln
<= file1end
; ++file1ln
) {
935 struct fileline
*linep
;
938 /* Read lines from both. */
939 if (!(line1
= xfgets(file1
)))
940 errx(2, "error reading file1 in delete in change");
942 /* Add to delete queue. */
943 if (!(linep
= malloc(sizeof(struct fileline
))))
946 SIMPLEQ_INSERT_TAIL(&delqhead
, linep
, fileentries
);
949 /* Process changed lines.. */
950 for (; !SIMPLEQ_EMPTY(&delqhead
) && file2ln
<= file2end
;
952 struct fileline
*del
;
956 if (!(add
= xfgets(file2
)))
957 errx(2, "error reading add in change");
959 del
= SIMPLEQ_FIRST(&delqhead
);
960 enqueue(del
->line
, '|', add
);
961 SIMPLEQ_REMOVE_HEAD(&delqhead
, fileentries
);
963 * Free fileline structure but not its elements since
964 * they are queued up.
970 /* Process remaining lines to add. */
971 for (; file2ln
<= file2end
; ++file2ln
) {
975 if (!(add
= xfgets(file2
)))
976 errx(2, "error reading add in change");
978 enqueue(NULL
, '>', add
);
982 /* Process remaining lines to delete. */
983 while (!SIMPLEQ_EMPTY(&delqhead
)) {
984 struct fileline
*filep
;
986 filep
= SIMPLEQ_FIRST(&delqhead
);
987 enqueue(filep
->line
, '<', NULL
);
988 SIMPLEQ_REMOVE_HEAD(&delqhead
, fileentries
);
995 * Print deleted lines from file, from file1ln to file1end.
998 printd(FILE *file1
, size_t file1end
)
1002 /* Print out lines file1ln to line2. */
1003 for (; file1ln
<= file1end
; ++file1ln
) {
1004 /* XXX - Why can't this handle stdin? */
1005 if (!(line1
= xfgets(file1
)))
1006 errx(2, "file1 ended early in delete");
1007 enqueue(line1
, '<', NULL
);
1013 * Interactive mode usage.
1018 puts("e:\tedit blank diff\n"
1019 "eb:\tedit both diffs concatenated\n"
1020 "el:\tedit left diff\n"
1021 "er:\tedit right diff\n"
1022 "l:\tchoose left diff\n"
1023 "r:\tchoose right diff\n"
1024 "s:\tsilent mode--don't print identical lines\n"
1025 "v:\tverbose mode--print identical lines\n"
1032 extern char *__progname
;
1035 "usage: %s [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",