Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / diffutils / src / util.c
blob8d12070e5bde6356fe868301ae6adb385e2245ad
1 /* $NetBSD$ */
3 /* Support routines for GNU DIFF.
5 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
6 Free Software Foundation, Inc.
8 This file is part of GNU DIFF.
10 GNU DIFF is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
15 GNU DIFF is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; see the file COPYING.
22 If not, write to the Free Software Foundation,
23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
25 #include "diff.h"
26 #include <dirname.h>
27 #include <error.h>
28 #include <quotesys.h>
29 #include <regex.h>
30 #include <xalloc.h>
32 char const pr_program[] = PR_PROGRAM;
34 /* Queue up one-line messages to be printed at the end,
35 when -l is specified. Each message is recorded with a `struct msg'. */
37 struct msg
39 struct msg *next;
40 char args[1]; /* Format + 4 args, each '\0' terminated, concatenated. */
43 /* Head of the chain of queues messages. */
45 static struct msg *msg_chain;
47 /* Tail of the chain of queues messages. */
49 static struct msg **msg_chain_end = &msg_chain;
51 /* Use when a system call returns non-zero status.
52 NAME should normally be the file name. */
54 void
55 perror_with_name (char const *name)
57 error (0, errno, "%s", name);
60 /* Use when a system call returns non-zero status and that is fatal. */
62 void
63 pfatal_with_name (char const *name)
65 int e = errno;
66 print_message_queue ();
67 error (EXIT_TROUBLE, e, "%s", name);
68 abort ();
71 /* Print an error message containing MSGID, then exit. */
73 void
74 fatal (char const *msgid)
76 print_message_queue ();
77 error (EXIT_TROUBLE, 0, "%s", _(msgid));
78 abort ();
81 /* Like printf, except if -l in effect then save the message and print later.
82 This is used for things like "Only in ...". */
84 void
85 message (char const *format_msgid, char const *arg1, char const *arg2)
87 message5 (format_msgid, arg1, arg2, 0, 0);
90 void
91 message5 (char const *format_msgid, char const *arg1, char const *arg2,
92 char const *arg3, char const *arg4)
94 if (paginate)
96 char *p;
97 char const *arg[5];
98 int i;
99 size_t size[5];
100 size_t total_size = offsetof (struct msg, args);
101 struct msg *new;
103 arg[0] = format_msgid;
104 arg[1] = arg1;
105 arg[2] = arg2;
106 arg[3] = arg3 ? arg3 : "";
107 arg[4] = arg4 ? arg4 : "";
109 for (i = 0; i < 5; i++)
110 total_size += size[i] = strlen (arg[i]) + 1;
112 new = xmalloc (total_size);
114 for (i = 0, p = new->args; i < 5; p += size[i++])
115 memcpy (p, arg[i], size[i]);
117 *msg_chain_end = new;
118 new->next = 0;
119 msg_chain_end = &new->next;
121 else
123 if (sdiff_merge_assist)
124 putchar (' ');
125 printf (_(format_msgid), arg1, arg2, arg3, arg4);
129 /* Output all the messages that were saved up by calls to `message'. */
131 void
132 print_message_queue (void)
134 char const *arg[5];
135 int i;
136 struct msg *m = msg_chain;
138 while (m)
140 struct msg *next = m->next;
141 arg[0] = m->args;
142 for (i = 0; i < 4; i++)
143 arg[i + 1] = arg[i] + strlen (arg[i]) + 1;
144 printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]);
145 free (m);
146 m = next;
150 /* Call before outputting the results of comparing files NAME0 and NAME1
151 to set up OUTFILE, the stdio stream for the output to go to.
153 Usually, OUTFILE is just stdout. But when -l was specified
154 we fork off a `pr' and make OUTFILE a pipe to it.
155 `pr' then outputs to our stdout. */
157 static char const *current_name0;
158 static char const *current_name1;
159 static bool currently_recursive;
161 void
162 setup_output (char const *name0, char const *name1, bool recursive)
164 current_name0 = name0;
165 current_name1 = name1;
166 currently_recursive = recursive;
167 outfile = 0;
170 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
171 static pid_t pr_pid;
172 #endif
174 void
175 begin_output (void)
177 char *name;
179 if (outfile != 0)
180 return;
182 /* Construct the header of this piece of diff. */
183 name = xmalloc (strlen (current_name0) + strlen (current_name1)
184 + strlen (switch_string) + 7);
186 /* POSIX 1003.1-2001 specifies this format. But there are some bugs in
187 the standard: it says that we must print only the last component
188 of the pathnames, and it requires two spaces after "diff" if
189 there are no options. These requirements are silly and do not
190 match historical practice. */
191 sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
193 if (paginate)
195 if (fflush (stdout) != 0)
196 pfatal_with_name (_("write failed"));
198 /* Make OUTFILE a pipe to a subsidiary `pr'. */
200 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
201 int pipes[2];
203 if (pipe (pipes) != 0)
204 pfatal_with_name ("pipe");
206 pr_pid = vfork ();
207 if (pr_pid < 0)
208 pfatal_with_name ("fork");
210 if (pr_pid == 0)
212 close (pipes[1]);
213 if (pipes[0] != STDIN_FILENO)
215 if (dup2 (pipes[0], STDIN_FILENO) < 0)
216 pfatal_with_name ("dup2");
217 close (pipes[0]);
220 execl (pr_program, pr_program, "-h", name, 0);
221 _exit (errno == ENOEXEC ? 126 : 127);
223 else
225 close (pipes[0]);
226 outfile = fdopen (pipes[1], "w");
227 if (!outfile)
228 pfatal_with_name ("fdopen");
230 #else
231 char *command = xmalloc (sizeof pr_program - 1 + 7
232 + quote_system_arg ((char *) 0, name) + 1);
233 char *p;
234 sprintf (command, "%s -f -h ", pr_program);
235 p = command + sizeof pr_program - 1 + 7;
236 p += quote_system_arg (p, name);
237 *p = 0;
238 errno = 0;
239 outfile = popen (command, "w");
240 if (!outfile)
241 pfatal_with_name (command);
242 free (command);
243 #endif
246 else
249 /* If -l was not specified, output the diff straight to `stdout'. */
251 outfile = stdout;
253 /* If handling multiple files (because scanning a directory),
254 print which files the following output is about. */
255 if (currently_recursive)
256 printf ("%s\n", name);
259 free (name);
261 /* A special header is needed at the beginning of context output. */
262 switch (output_style)
264 case OUTPUT_CONTEXT:
265 print_context_header (files, 0);
266 break;
268 case OUTPUT_UNIFIED:
269 print_context_header (files, 1);
270 break;
272 default:
273 break;
277 /* Call after the end of output of diffs for one file.
278 Close OUTFILE and get rid of the `pr' subfork. */
280 void
281 finish_output (void)
283 if (outfile != 0 && outfile != stdout)
285 int wstatus;
286 int werrno = 0;
287 if (ferror (outfile))
288 fatal ("write failed");
289 #if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK)
290 wstatus = pclose (outfile);
291 if (wstatus == -1)
292 werrno = errno;
293 #else
294 if (fclose (outfile) != 0)
295 pfatal_with_name (_("write failed"));
296 if (waitpid (pr_pid, &wstatus, 0) < 0)
297 pfatal_with_name ("waitpid");
298 #endif
299 if (! werrno && WIFEXITED (wstatus) && WEXITSTATUS (wstatus) == 127)
300 error (EXIT_TROUBLE, 0, _("subsidiary program `%s' not found"),
301 pr_program);
302 if (wstatus != 0)
303 error (EXIT_TROUBLE, werrno, _("subsidiary program `%s' failed"),
304 pr_program);
307 outfile = 0;
310 /* Compare two lines (typically one from each input file)
311 according to the command line options.
312 For efficiency, this is invoked only when the lines do not match exactly
313 but an option like -i might cause us to ignore the difference.
314 Return nonzero if the lines differ. */
316 bool
317 lines_differ (char const *s1, char const *s2)
319 register unsigned char const *t1 = (unsigned char const *) s1;
320 register unsigned char const *t2 = (unsigned char const *) s2;
321 size_t column = 0;
323 while (1)
325 register unsigned char c1 = *t1++;
326 register unsigned char c2 = *t2++;
328 /* Test for exact char equality first, since it's a common case. */
329 if (c1 != c2)
331 switch (ignore_white_space)
333 case IGNORE_ALL_SPACE:
334 /* For -w, just skip past any white space. */
335 while (ISSPACE (c1) && c1 != '\n') c1 = *t1++;
336 while (ISSPACE (c2) && c2 != '\n') c2 = *t2++;
337 break;
339 case IGNORE_SPACE_CHANGE:
340 /* For -b, advance past any sequence of white space in
341 line 1 and consider it just one space, or nothing at
342 all if it is at the end of the line. */
343 if (ISSPACE (c1))
345 while (c1 != '\n')
347 c1 = *t1++;
348 if (! ISSPACE (c1))
350 --t1;
351 c1 = ' ';
352 break;
357 /* Likewise for line 2. */
358 if (ISSPACE (c2))
360 while (c2 != '\n')
362 c2 = *t2++;
363 if (! ISSPACE (c2))
365 --t2;
366 c2 = ' ';
367 break;
372 if (c1 != c2)
374 /* If we went too far when doing the simple test
375 for equality, go back to the first non-white-space
376 character in both sides and try again. */
377 if (c2 == ' ' && c1 != '\n'
378 && (unsigned char const *) s1 + 1 < t1
379 && ISSPACE (t1[-2]))
381 --t1;
382 continue;
384 if (c1 == ' ' && c2 != '\n'
385 && (unsigned char const *) s2 + 1 < t2
386 && ISSPACE (t2[-2]))
388 --t2;
389 continue;
393 break;
395 case IGNORE_TAB_EXPANSION:
396 if ((c1 == ' ' && c2 == '\t')
397 || (c1 == '\t' && c2 == ' '))
399 size_t column2 = column;
400 for (;; c1 = *t1++)
402 if (c1 == ' ')
403 column++;
404 else if (c1 == '\t')
405 column += TAB_WIDTH - column % TAB_WIDTH;
406 else
407 break;
409 for (;; c2 = *t2++)
411 if (c2 == ' ')
412 column2++;
413 else if (c2 == '\t')
414 column2 += TAB_WIDTH - column2 % TAB_WIDTH;
415 else
416 break;
418 if (column != column2)
419 return 1;
421 break;
423 case IGNORE_NO_WHITE_SPACE:
424 break;
427 /* Lowercase all letters if -i is specified. */
429 if (ignore_case)
431 c1 = TOLOWER (c1);
432 c2 = TOLOWER (c2);
435 if (c1 != c2)
436 break;
438 if (c1 == '\n')
439 return 0;
441 column += c1 == '\t' ? TAB_WIDTH - column % TAB_WIDTH : 1;
444 return 1;
447 /* Find the consecutive changes at the start of the script START.
448 Return the last link before the first gap. */
450 struct change *
451 find_change (struct change *start)
453 return start;
456 struct change *
457 find_reverse_change (struct change *start)
459 return start;
462 /* Divide SCRIPT into pieces by calling HUNKFUN and
463 print each piece with PRINTFUN.
464 Both functions take one arg, an edit script.
466 HUNKFUN is called with the tail of the script
467 and returns the last link that belongs together with the start
468 of the tail.
470 PRINTFUN takes a subscript which belongs together (with a null
471 link at the end) and prints it. */
473 void
474 print_script (struct change *script,
475 struct change * (*hunkfun) (struct change *),
476 void (*printfun) (struct change *))
478 struct change *next = script;
480 while (next)
482 struct change *this, *end;
484 /* Find a set of changes that belong together. */
485 this = next;
486 end = (*hunkfun) (next);
488 /* Disconnect them from the rest of the changes,
489 making them a hunk, and remember the rest for next iteration. */
490 next = end->link;
491 end->link = 0;
492 #ifdef DEBUG
493 debug_script (this);
494 #endif
496 /* Print this hunk. */
497 (*printfun) (this);
499 /* Reconnect the script so it will all be freed properly. */
500 end->link = next;
504 /* Print the text of a single line LINE,
505 flagging it with the characters in LINE_FLAG (which say whether
506 the line is inserted, deleted, changed, etc.). */
508 void
509 print_1_line (char const *line_flag, char const *const *line)
511 char const *base = line[0], *limit = line[1]; /* Help the compiler. */
512 FILE *out = outfile; /* Help the compiler some more. */
513 char const *flag_format = 0;
515 /* If -T was specified, use a Tab between the line-flag and the text.
516 Otherwise use a Space (as Unix diff does).
517 Print neither space nor tab if line-flags are empty. */
519 if (line_flag && *line_flag)
521 flag_format = initial_tab ? "%s\t" : "%s ";
522 fprintf (out, flag_format, line_flag);
525 output_1_line (base, limit, flag_format, line_flag);
527 if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
528 fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
531 /* Output a line from BASE up to LIMIT.
532 With -t, expand white space characters to spaces, and if FLAG_FORMAT
533 is nonzero, output it with argument LINE_FLAG after every
534 internal carriage return, so that tab stops continue to line up. */
536 void
537 output_1_line (char const *base, char const *limit, char const *flag_format,
538 char const *line_flag)
540 if (!expand_tabs)
541 fwrite (base, limit - base, 1, outfile);
542 else
544 register FILE *out = outfile;
545 register unsigned char c;
546 register char const *t = base;
547 register unsigned int column = 0;
549 while (t < limit)
550 switch ((c = *t++))
552 case '\t':
554 unsigned int spaces = TAB_WIDTH - column % TAB_WIDTH;
555 column += spaces;
557 putc (' ', out);
558 while (--spaces);
560 break;
562 case '\r':
563 putc (c, out);
564 if (flag_format && t < limit && *t != '\n')
565 fprintf (out, flag_format, line_flag);
566 column = 0;
567 break;
569 case '\b':
570 if (column == 0)
571 continue;
572 column--;
573 putc (c, out);
574 break;
576 default:
577 if (ISPRINT (c))
578 column++;
579 putc (c, out);
580 break;
585 char const change_letter[] = { 0, 'd', 'a', 'c' };
587 /* Translate an internal line number (an index into diff's table of lines)
588 into an actual line number in the input file.
589 The internal line number is I. FILE points to the data on the file.
591 Internal line numbers count from 0 starting after the prefix.
592 Actual line numbers count from 1 within the entire file. */
595 translate_line_number (struct file_data const *file, lin i)
597 return i + file->prefix_lines + 1;
600 /* Translate a line number range. This is always done for printing,
601 so for convenience translate to long rather than lin, so that the
602 caller can use printf with "%ld" without casting. */
604 void
605 translate_range (struct file_data const *file,
606 lin a, lin b,
607 long *aptr, long *bptr)
609 *aptr = translate_line_number (file, a - 1) + 1;
610 *bptr = translate_line_number (file, b + 1) - 1;
613 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
614 If the two numbers are identical, print just one number.
616 Args A and B are internal line numbers.
617 We print the translated (real) line numbers. */
619 void
620 print_number_range (char sepchar, struct file_data *file, lin a, lin b)
622 long trans_a, trans_b;
623 translate_range (file, a, b, &trans_a, &trans_b);
625 /* Note: we can have B < A in the case of a range of no lines.
626 In this case, we should print the line number before the range,
627 which is B. */
628 if (trans_b > trans_a)
629 fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b);
630 else
631 fprintf (outfile, "%ld", trans_b);
634 /* Look at a hunk of edit script and report the range of lines in each file
635 that it applies to. HUNK is the start of the hunk, which is a chain
636 of `struct change'. The first and last line numbers of file 0 are stored in
637 *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
638 Note that these are internal line numbers that count from 0.
640 If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
642 Return UNCHANGED if only ignorable lines are inserted or deleted,
643 OLD if lines of file 0 are deleted,
644 NEW if lines of file 1 are inserted,
645 and CHANGED if both kinds of changes are found. */
647 enum changes
648 analyze_hunk (struct change *hunk,
649 lin *first0, lin *last0,
650 lin *first1, lin *last1)
652 struct change *next;
653 lin l0, l1;
654 lin show_from, show_to;
655 lin i;
656 bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
657 size_t trivial_length = (int) ignore_blank_lines - 1;
658 /* If 0, ignore zero-length lines;
659 if SIZE_MAX, do not ignore lines just because of their length. */
661 char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */
662 char const * const *linbuf1 = files[1].linbuf;
664 show_from = show_to = 0;
666 *first0 = hunk->line0;
667 *first1 = hunk->line1;
669 next = hunk;
672 l0 = next->line0 + next->deleted - 1;
673 l1 = next->line1 + next->inserted - 1;
674 show_from += next->deleted;
675 show_to += next->inserted;
677 for (i = next->line0; i <= l0 && trivial; i++)
679 char const *line = linbuf0[i];
680 size_t len = linbuf0[i + 1] - line - 1;
681 if (len != trivial_length
682 && (! ignore_regexp.fastmap
683 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
684 trivial = 0;
687 for (i = next->line1; i <= l1 && trivial; i++)
689 char const *line = linbuf1[i];
690 size_t len = linbuf1[i + 1] - line - 1;
691 if (len != trivial_length
692 && (! ignore_regexp.fastmap
693 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
694 trivial = 0;
697 while ((next = next->link) != 0);
699 *last0 = l0;
700 *last1 = l1;
702 /* If all inserted or deleted lines are ignorable,
703 tell the caller to ignore this hunk. */
705 if (trivial)
706 return UNCHANGED;
708 return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
711 /* Concatenate three strings, returning a newly malloc'd string. */
713 char *
714 concat (char const *s1, char const *s2, char const *s3)
716 char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
717 sprintf (new, "%s%s%s", s1, s2, s3);
718 return new;
721 /* Yield a new block of SIZE bytes, initialized to zero. */
723 void *
724 zalloc (size_t size)
726 void *p = xmalloc (size);
727 memset (p, 0, size);
728 return p;
731 /* Yield the newly malloc'd pathname
732 of the file in DIR whose filename is FILE. */
734 char *
735 dir_file_pathname (char const *dir, char const *file)
737 char const *base = base_name (dir);
738 bool omit_slash = !*base || base[strlen (base) - 1] == '/';
739 return concat (dir, "/" + omit_slash, file);
742 void
743 debug_script (struct change *sp)
745 fflush (stdout);
747 for (; sp; sp = sp->link)
749 long line0 = sp->line0;
750 long line1 = sp->line1;
751 long deleted = sp->deleted;
752 long inserted = sp->inserted;
753 fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n",
754 line0, line1, deleted, inserted);
757 fflush (stderr);