doc: rewrite the "Unusual File Names" section
[diffutils.git] / src / util.c
bloba3dc315ce6578d68d2d8dd9ab90cfdc392594dd5
1 /* Support routines for GNU DIFF.
3 Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2013,
4 2015-2025 Free Software Foundation, Inc.
6 This file is part of GNU DIFF.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "diff.h"
23 #include <argmatch.h>
24 #include <diagnose.h>
25 #include <dirname.h>
26 #include <error.h>
27 #include <flexmember.h>
28 #include <mcel.h>
29 #include <quotearg.h>
30 #include <system-quote.h>
31 #include <xalloc.h>
33 #include <stdarg.h>
34 #include <signal.h>
36 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
37 present. */
38 #ifndef SA_NOCLDSTOP
39 # define SA_NOCLDSTOP 0
40 # define sigprocmask(How, Set, Oset) 0
41 # if ! HAVE_SIGINTERRUPT
42 # define siginterrupt(sig, flag) 0
43 # endif
44 #endif
46 #ifndef SA_RESTART
47 # define SA_RESTART 0
48 #endif
49 #ifndef SIGSTOP
50 # define SIGSTOP 0
51 #endif
52 #ifndef SIGTSTP
53 # define SIGTSTP 0
54 #endif
56 char const pr_program[] = PR_PROGRAM;
58 /* Queue up one-line messages to be printed at the end,
59 when -l is specified. Each message is recorded with a 'struct msg'. */
61 struct msg
63 struct msg *next;
65 /* Msgid of printf-style format. */
66 char const *msgid;
68 /* Number of bytes in ARGS. */
69 idx_t argbytes;
71 /* Arg strings, each '\0' terminated, concatenated. */
72 char args[FLEXIBLE_ARRAY_MEMBER];
75 /* Head of the chain of queues messages. */
77 static struct msg *msg_chain;
79 /* Tail of the chain of queues messages. */
81 static struct msg **msg_chain_end = &msg_chain;
83 /* Use when a system call returns non-zero status.
84 NAME should normally be the file name. */
86 void
87 perror_with_name (char const *name)
89 error (0, errno, "%s", squote (0, name));
92 /* Use when a system call returns non-zero status and that is fatal. */
94 void
95 pfatal_with_name (char const *name)
97 int e = errno;
98 print_message_queue ();
99 error (EXIT_TROUBLE, e, "%s", name);
102 /* Print an error message containing MSGID, then exit. */
104 void
105 fatal (char const *msgid)
107 print_message_queue ();
108 error (EXIT_TROUBLE, 0, "%s", _(msgid));
111 /* Like printf, except if -l in effect then save the message and print later.
112 Also, all arguments must be char * or char const *.
113 This is used for things like "Only in ...". */
115 void
116 message (char const *format_msgid, ...)
118 va_list ap;
119 va_start (ap, format_msgid);
121 if (paginate)
123 idx_t argbytes = 0;
125 for (char const *m = format_msgid; *m; m++)
126 if (*m == '%')
128 if (m[1] == '%')
129 m++;
130 else
131 argbytes += strlen (va_arg (ap, char const *)) + 1;
133 va_end (ap);
135 struct msg *new = xmalloc (FLEXSIZEOF (struct msg, args, argbytes));
136 new->msgid = format_msgid;
137 new->argbytes = argbytes;
139 va_start (ap, format_msgid);
140 char *p = new->args;
141 for (char const *m = format_msgid; *m; m++)
142 if (*m == '%')
144 if (m[1] == '%')
145 m++;
146 else
147 p = stpcpy (p, va_arg (ap, char const *)) + 1;
150 *msg_chain_end = new;
151 new->next = nullptr;
152 msg_chain_end = &new->next;
154 else
156 if (sdiff_merge_assist)
157 putchar (' ');
158 vprintf (_(format_msgid), ap);
161 va_end (ap);
164 /* Output all the messages that were saved up by calls to 'message'. */
166 void
167 print_message_queue (void)
169 for (struct msg *m = msg_chain; m; )
171 /* Change this if diff ever has messages with more than 4 args. */
172 char const *p = m->args;
173 char const *plim = p + m->argbytes;
174 /* Unroll the loop to work around GCC 12 bug with
175 -Wanalyzer-use-of-uninitialized-value. */
176 char const *arg0 = p; p += p < plim ? strlen (p) + 1 : 0;
177 char const *arg1 = p; p += p < plim ? strlen (p) + 1 : 0;
178 char const *arg2 = p; p += p < plim ? strlen (p) + 1 : 0;
179 char const *arg3 = p; p += p < plim ? strlen (p) + 1 : 0;
180 printf (_(m->msgid), arg0, arg1, arg2, arg3);
181 dassert (plim <= p);
182 struct msg *next = m->next;
183 free (m);
184 m = next;
188 /* Signal handling, needed for restoring default colors. */
190 static void
191 xsigaddset (sigset_t *set, int sig)
193 if (sigaddset (set, sig) != 0)
194 pfatal_with_name ("sigaddset");
197 static bool
198 xsigismember (sigset_t const *set, int sig)
200 int mem = sigismember (set, sig);
201 if (mem < 0)
202 pfatal_with_name ("sigismember");
203 assume (mem <= 1);
204 return mem;
207 typedef void (*signal_handler) (int);
208 static signal_handler
209 xsignal (int sig, signal_handler func)
211 signal_handler h = signal (sig, func);
212 if (h == SIG_ERR)
213 pfatal_with_name ("signal");
214 return h;
217 static void
218 xsigprocmask (int how, sigset_t const *restrict set, sigset_t *restrict oset)
220 if (sigprocmask (how, set, oset) != 0)
221 pfatal_with_name ("sigprocmask");
224 /* If true, some signals are caught. This is separate from
225 'caught_signals' because POSIX doesn't require an all-zero sigset_t
226 to be valid. */
227 static bool some_signals_caught;
229 /* The set of signals that are caught. */
230 static sigset_t caught_signals;
232 /* If nonzero, the value of the pending fatal signal. */
233 static sig_atomic_t volatile interrupt_signal;
235 /* A count of the number of pending stop signals that have been received. */
236 static sig_atomic_t volatile stop_signal_count;
238 /* An ordinary signal was received; arrange for the program to exit. */
240 static void
241 sighandler (int sig)
243 if (! SA_NOCLDSTOP)
244 signal (sig, SIG_IGN);
245 if (! interrupt_signal)
246 interrupt_signal = sig;
249 /* A SIGTSTP was received; arrange for the program to suspend itself. */
251 static void
252 stophandler (int sig)
254 if (! SA_NOCLDSTOP)
255 signal (sig, stophandler);
256 if (! interrupt_signal)
257 stop_signal_count++;
259 /* Process any pending signals. If signals are caught, this function
260 should be called periodically. Ideally there should never be an
261 unbounded amount of time when signals are not being processed.
262 Signal handling can restore the default colors, so callers must
263 immediately change colors after invoking this function. */
265 static void
266 process_signals (void)
268 while (interrupt_signal | stop_signal_count)
270 set_color_context (RESET_CONTEXT);
271 fflush (stdout);
273 sigset_t oldset;
274 xsigprocmask (SIG_BLOCK, &caught_signals, &oldset);
276 /* Reload stop_signal_count and (if needed) interrupt_signal, in
277 case a new signal was handled before sigprocmask took effect. */
278 int stops = stop_signal_count, sig;
280 /* SIGTSTP is special, since the application can receive that signal
281 more than once. In this case, don't set the signal handler to the
282 default. Instead, just raise the uncatchable SIGSTOP. */
283 if (stops)
285 stop_signal_count = stops - 1;
286 sig = SIGSTOP;
288 else
290 sig = interrupt_signal;
291 xsignal (sig, SIG_DFL);
294 /* Exit or suspend the program. */
295 if (raise (sig) != 0)
296 pfatal_with_name ("raise");
297 xsigprocmask (SIG_SETMASK, &oldset, nullptr);
299 /* If execution reaches here, then the program has been
300 continued (after being suspended). */
304 /* The signals that can be caught, the number of such signals,
305 and which of them are actually caught. */
306 static int const sig[] =
308 #if SIGTSTP
309 /* This one is handled specially; see is_tstp_index. */
310 SIGTSTP,
311 #endif
313 /* The usual suspects. */
314 #ifdef SIGALRM
315 SIGALRM,
316 #endif
317 #ifdef SIGHUP
318 SIGHUP,
319 #endif
320 SIGINT,
321 #ifdef SIGPIPE
322 SIGPIPE,
323 #endif
324 #ifdef SIGQUIT
325 SIGQUIT,
326 #endif
327 SIGTERM,
328 #ifdef SIGPOLL
329 SIGPOLL,
330 #endif
331 #ifdef SIGPROF
332 SIGPROF,
333 #endif
334 #ifdef SIGVTALRM
335 SIGVTALRM,
336 #endif
337 #ifdef SIGXCPU
338 SIGXCPU,
339 #endif
340 #ifdef SIGXFSZ
341 SIGXFSZ,
342 #endif
344 enum { nsigs = sizeof (sig) / sizeof *(sig) };
346 /* True if sig[j] == SIGTSTP. */
347 static bool
348 is_tstp_index (int j)
350 return SIGTSTP && j == 0;
353 static void
354 install_signal_handlers (void)
356 if (sigemptyset (&caught_signals) != 0)
357 pfatal_with_name ("sigemptyset");
359 #if SA_NOCLDSTOP
360 for (int j = 0; j < nsigs; j++)
362 struct sigaction actj;
363 if (sigaction (sig[j], nullptr, &actj) == 0 && actj.sa_handler != SIG_IGN)
364 xsigaddset (&caught_signals, sig[j]);
367 struct sigaction act;
368 act.sa_mask = caught_signals;
369 act.sa_flags = SA_RESTART;
371 for (int j = 0; j < nsigs; j++)
372 if (xsigismember (&caught_signals, sig[j]))
374 act.sa_handler = is_tstp_index (j) ? stophandler : sighandler;
375 if (sigaction (sig[j], &act, nullptr) != 0)
376 pfatal_with_name ("sigaction");
377 some_signals_caught = true;
379 #else
380 for (int j = 0; j < nsigs; j++)
382 signal_handler h = signal (sig[j], SIG_IGN);
383 if (h != SIG_IGN && h != SIG_ERR)
385 xsigaddset (&caught_signals, sig[j]);
386 xsignal (sig[j], is_tstp_index (j) ? stophandler : sighandler);
387 some_signals_caught = true;
388 if (siginterrupt (sig[j], 0) != 0)
389 pfatal_with_name ("siginterrupt");
392 #endif
395 /* Clean up signal handlers just before exiting the program. Do this
396 by resetting signal actions back to default, and then processing
397 any signals that arrived before resetting. */
398 void
399 cleanup_signal_handlers (void)
401 if (some_signals_caught)
403 for (int j = 0; j < nsigs; j++)
404 if (xsigismember (&caught_signals, sig[j]))
405 xsignal (sig[j], SIG_DFL);
406 process_signals ();
410 static char const *current_name[2];
411 static bool currently_recursive;
412 static bool colors_enabled;
414 struct bin_str
416 idx_t len; /* Number of bytes */
417 const char *string; /* Pointer to the same */
420 /* Parse a string as part of the --palette argument; this may involve
421 decoding all kinds of escape characters. If equals_end is set an
422 unescaped equal sign ends the string, otherwise only a : or \0
423 does. Return true if successful.
425 The resulting string is *not* null-terminated, but may contain
426 embedded nulls.
428 *dest and *src may point into the same string, in which case *dest
429 must not exceed *src and the string is modified in place.
431 Note that both dest and src are char **; on return they point to
432 the first free byte after the array and the character that ended
433 the input string, respectively. */
435 static bool
436 get_funky_string (char **dest, const char **src, bool equals_end)
438 enum {
439 ST_GND, ST_BACKSLASH, ST_OCTAL, ST_HEX, ST_CARET, ST_END, ST_ERROR
440 } state = ST_GND;
442 char const *p = *src; /* We don't want to double-indirect */
443 char *q = *dest; /* the whole darn time. */
445 char num = 0; /* For numerical codes. */
447 while (state < ST_END)
449 switch (state)
451 case ST_GND: /* Ground state (no escapes) */
452 switch (*p)
454 case ':':
455 case '\0':
456 state = ST_END; /* End of string */
457 break;
458 case '\\':
459 state = ST_BACKSLASH; /* Backslash scape sequence */
460 ++p;
461 break;
462 case '^':
463 state = ST_CARET; /* Caret escape */
464 ++p;
465 break;
466 case '=':
467 if (equals_end)
469 state = ST_END; /* End */
470 break;
472 FALLTHROUGH;
473 default:
474 *(q++) = *(p++);
475 break;
477 break;
479 case ST_BACKSLASH: /* Backslash escaped character */
480 switch (*p)
482 case '0':
483 case '1':
484 case '2':
485 case '3':
486 case '4':
487 case '5':
488 case '6':
489 case '7':
490 state = ST_OCTAL; /* Octal sequence */
491 num = *p - '0';
492 break;
493 case 'x':
494 case 'X':
495 state = ST_HEX; /* Hex sequence */
496 num = 0;
497 break;
498 case 'a': /* Bell */
499 num = '\a';
500 break;
501 case 'b': /* Backspace */
502 num = '\b';
503 break;
504 case 'e': /* Escape */
505 num = 27;
506 break;
507 case 'f': /* Form feed */
508 num = '\f';
509 break;
510 case 'n': /* Newline */
511 num = '\n';
512 break;
513 case 'r': /* Carriage return */
514 num = '\r';
515 break;
516 case 't': /* Tab */
517 num = '\t';
518 break;
519 case 'v': /* Vtab */
520 num = '\v';
521 break;
522 case '?': /* Delete */
523 num = 127;
524 break;
525 case '_': /* Space */
526 num = ' ';
527 break;
528 case '\0': /* End of string */
529 state = ST_ERROR; /* Error! */
530 break;
531 default: /* Escaped character like \ ^ : = */
532 num = *p;
533 break;
535 if (state == ST_BACKSLASH)
537 *(q++) = num;
538 state = ST_GND;
540 ++p;
541 break;
543 case ST_OCTAL: /* Octal sequence */
544 if (*p < '0' || *p > '7')
546 *(q++) = num;
547 state = ST_GND;
549 else
550 num = (num << 3) + (*(p++) - '0');
551 break;
553 case ST_HEX: /* Hex sequence */
554 switch (*p)
556 case '0':
557 case '1':
558 case '2':
559 case '3':
560 case '4':
561 case '5':
562 case '6':
563 case '7':
564 case '8':
565 case '9':
566 num = (num << 4) + (*(p++) - '0');
567 break;
568 case 'a':
569 case 'b':
570 case 'c':
571 case 'd':
572 case 'e':
573 case 'f':
574 num = (num << 4) + (*(p++) - 'a') + 10;
575 break;
576 case 'A':
577 case 'B':
578 case 'C':
579 case 'D':
580 case 'E':
581 case 'F':
582 num = (num << 4) + (*(p++) - 'A') + 10;
583 break;
584 default:
585 *(q++) = num;
586 state = ST_GND;
587 break;
589 break;
591 case ST_CARET: /* Caret escape */
592 state = ST_GND; /* Should be the next state... */
593 if (*p >= '@' && *p <= '~')
595 *(q++) = *(p++) & 037;
597 else if (*p == '?')
599 *(q++) = 127;
601 else
602 state = ST_ERROR;
603 break;
605 default:
606 unreachable ();
610 *dest = q;
611 *src = p;
613 return state != ST_ERROR;
616 enum parse_state
618 PS_START = 1,
619 PS_2,
620 PS_3,
621 PS_4,
622 PS_DONE,
623 PS_FAIL
626 #define LEN_STR_PAIR(s) sizeof (s) - 1, s
628 static struct bin_str color_indicator[] =
630 { LEN_STR_PAIR ("\033[") }, /* lc: Left of color sequence */
631 { LEN_STR_PAIR ("m") }, /* rc: Right of color sequence */
632 { 0, nullptr }, /* ec: End color (replaces lc+rs+rc) */
633 { LEN_STR_PAIR ("0") }, /* rs: Reset to ordinary colors */
634 { LEN_STR_PAIR ("1") }, /* hd: Header */
635 { LEN_STR_PAIR ("32") }, /* ad: Add line */
636 { LEN_STR_PAIR ("31") }, /* de: Delete line */
637 { LEN_STR_PAIR ("36") }, /* ln: Line number */
640 static const char *const indicator_name[] =
642 "lc", "rc", "ec", "rs", "hd", "ad", "de", "ln", nullptr
644 ARGMATCH_VERIFY (indicator_name, color_indicator);
646 static char *color_palette;
648 /* Set the color palette to PALETTE, a string that set_color_context
649 can modify later. */
650 void
651 set_color_palette (char *palette)
653 color_palette = palette;
656 static void
657 parse_diff_color (void)
659 /* Process color_palette into itself. This saves a bit of memory,
660 and pacifies Coverity. The output is no larger than the input. */
661 char *buf = color_palette;
662 char const *p = buf;
663 if (p == nullptr || *p == '\0')
664 return;
665 /* Do not process the color palette twice. */
666 color_palette = nullptr;
668 char label[] = "??"; /* Indicator label */
670 enum parse_state state = PS_START;
671 while (true)
673 switch (state)
675 case PS_START: /* First label character */
676 switch (*p)
678 case ':':
679 ++p;
680 break;
682 case '*':
683 ++p;
684 state = get_funky_string (&buf, &p, true) ? PS_4 : PS_FAIL;
685 break;
687 case '\0':
688 state = PS_DONE; /* Done! */
689 goto done;
691 default: /* Assume it is file type label */
692 label[0] = *(p++);
693 state = PS_2;
694 break;
696 break;
698 case PS_2: /* Second label character */
699 if (*p)
701 label[1] = *(p++);
702 state = PS_3;
704 else
705 state = PS_FAIL; /* Error */
706 break;
708 case PS_3: /* Equal sign after indicator label */
709 state = PS_FAIL; /* Assume failure... */
710 if (*(p++) == '=')/* It *should* be... */
712 for (int ind_no = 0; indicator_name[ind_no] != nullptr; ind_no++)
714 if (STREQ (label, indicator_name[ind_no]))
716 char *str = buf;
717 if (get_funky_string (&buf, &p, false))
719 color_indicator[ind_no].string = str;
720 color_indicator[ind_no].len = buf - str;
721 state = PS_START;
723 break;
726 if (state == PS_FAIL)
727 error (0, 0, _("unrecognized prefix: %s"), label);
729 break;
731 case PS_4: /* Equal sign after *.ext */
732 state = (*p++ == '=' && get_funky_string (&buf, &p, false)
733 ? PS_START : PS_FAIL);
734 break;
736 case PS_FAIL:
737 goto done;
739 default:
740 unreachable ();
743 done:
745 if (state == PS_FAIL)
747 error (0, 0,
748 _("unparsable value for --palette"));
749 colors_enabled = false;
753 static void
754 check_color_output (bool is_pipe)
756 bool output_is_tty;
758 if (! outfile || colors_style == NEVER)
759 return;
761 output_is_tty = presume_output_tty || (!is_pipe && isatty (fileno (outfile)));
763 colors_enabled = (colors_style == ALWAYS
764 || (colors_style == AUTO && output_is_tty));
766 if (colors_enabled)
767 parse_diff_color ();
769 if (output_is_tty)
770 install_signal_handlers ();
773 /* Call before outputting the results of comparing files NAME0 and NAME1
774 to set up OUTFILE, the stdio stream for the output to go to.
776 Usually, OUTFILE is just stdout. But when -l was specified
777 we fork off a 'pr' and make OUTFILE a pipe to it.
778 'pr' then outputs to our stdout. */
780 void
781 setup_output (char const *name0, char const *name1, bool recursive)
783 current_name[0] = name0;
784 current_name[1] = name1;
785 currently_recursive = recursive;
786 outfile = nullptr;
789 #if HAVE_WORKING_FORK
790 static pid_t pr_pid;
791 #endif
794 void
795 begin_output (void)
797 if (outfile)
798 return;
800 char const *names[2];
801 for (int f = 0; f < 2; f++)
802 names[f] = quotearg_n_style (f,
803 (strchr (current_name[f], ' ')
804 ? c_quoting_style : c_maybe_quoting_style),
805 current_name[f]);
807 /* Construct the header of this piece of diff. */
808 /* POSIX 1003.1-2017 specifies this format. But there are some bugs in
809 the standard: it says that we must print only the last component
810 of the pathnames, and it requires two spaces after "diff" if
811 there are no options. These requirements are silly and do not
812 match historical practice. */
813 char *name = xmalloc (sizeof "diff" + strlen (switch_string)
814 + 1 + strlen (names[0]) + 1 + strlen (names[1]));
815 char *p = stpcpy (name, "diff");
816 p = stpcpy (p, switch_string);
817 *p++ = ' ';
818 p = stpcpy (p, names[0]);
819 *p++ = ' ';
820 strcpy (p, names[1]);
822 if (paginate)
824 if (fflush (stdout) != 0)
825 pfatal_with_name (_("write failed"));
827 char const *argv[4] = {pr_program, "-h", name, nullptr };
829 /* Make OUTFILE a pipe to a subsidiary 'pr'. */
830 #if HAVE_WORKING_FORK
831 int pipes[2];
832 if (pipe (pipes) != 0)
833 pfatal_with_name ("pipe");
835 pr_pid = fork ();
836 if (pr_pid < 0)
837 pfatal_with_name ("fork");
839 if (pr_pid == 0)
841 close (pipes[1]);
842 if (pipes[0] != STDIN_FILENO)
844 if (dup2 (pipes[0], STDIN_FILENO) < 0)
845 pfatal_with_name ("dup2");
846 close (pipes[0]);
849 execv (pr_program, (char **) argv);
850 _exit (errno == ENOENT ? 127 : 126);
852 else
854 close (pipes[0]);
855 outfile = fdopen (pipes[1], "w");
856 if (!outfile)
857 pfatal_with_name ("fdopen");
858 check_color_output (true);
860 #else
861 char *command = system_quote_argv (SCI_SYSTEM, (char **) argv);
862 errno = 0;
863 outfile = popen (command, "w");
864 if (!outfile)
865 pfatal_with_name (command);
866 check_color_output (true);
867 free (command);
868 #endif
870 else
873 /* If -l was not specified, output the diff straight to 'stdout'. */
875 outfile = stdout;
876 check_color_output (false);
878 /* If handling multiple files (because scanning a directory),
879 print which files the following output is about. */
880 if (currently_recursive)
881 puts (name);
884 free (name);
886 /* A special header is needed at the beginning of context output. */
887 if (output_style == OUTPUT_CONTEXT || output_style == OUTPUT_UNIFIED)
888 print_context_header (curr.file, names,
889 output_style == OUTPUT_UNIFIED);
892 /* Call after the end of output of diffs for one file.
893 Close OUTFILE and get rid of the 'pr' subfork. */
895 void
896 finish_output (void)
898 if (outfile && outfile != stdout)
900 int wstatus;
901 int werrno = 0;
902 if (ferror (outfile))
903 fatal ("write failed");
904 #if ! HAVE_WORKING_FORK
905 wstatus = pclose (outfile);
906 if (wstatus == -1)
907 werrno = errno;
908 #else
909 if (fclose (outfile) != 0)
910 pfatal_with_name (_("write failed"));
911 if (waitpid (pr_pid, &wstatus, 0) < 0)
912 pfatal_with_name ("waitpid");
913 #endif
914 int status = (! werrno && WIFEXITED (wstatus)
915 ? WEXITSTATUS (wstatus)
916 : INT_MAX);
917 if (status)
918 error (EXIT_TROUBLE, werrno,
919 _(status == 126
920 ? "subsidiary program %s could not be invoked"
921 : status == 127
922 ? "subsidiary program %s not found"
923 : status == INT_MAX
924 ? "subsidiary program %s failed"
925 : "subsidiary program %s failed (exit status %d)"),
926 quote (pr_program), status);
929 outfile = nullptr;
932 /* Find the consecutive changes at the start of the script START.
933 Return the last link before the first gap. */
935 struct change *
936 find_change (struct change *script)
938 return script;
941 /* Divide SCRIPT into pieces by calling HUNKFUN and
942 print each piece with PRINTFUN.
943 Both functions take one arg, an edit script.
945 HUNKFUN is called with the tail of the script
946 and returns the last link that belongs together with the start
947 of the tail.
949 PRINTFUN takes a subscript which belongs together (with a null
950 link at the end) and prints it. */
952 void
953 print_script (struct change *script,
954 struct change * (*hunkfun) (struct change *),
955 void (*printfun) (struct change *))
957 struct change *next = script;
959 while (next)
961 /* Find a set of changes that belong together. */
962 struct change *this = next;
963 struct change *end = (*hunkfun) (next);
965 /* Disconnect them from the rest of the changes,
966 making them a hunk, and remember the rest for next iteration. */
967 next = end->link;
968 end->link = nullptr;
969 #ifdef DEBUG
970 debug_script (this);
971 #endif
973 /* Print this hunk. */
974 (*printfun) (this);
976 /* Reconnect the script so it will all be freed properly. */
977 end->link = next;
981 /* Print the text of a single line LINE,
982 flagging it with the characters in LINE_FLAG (which say whether
983 the line is inserted, deleted, changed, etc.). LINE_FLAG must not
984 end in a blank, unless it is a single blank. */
986 void
987 print_1_line (char const *line_flag, char const *const *line)
989 print_1_line_nl (line_flag, line, false);
992 /* Print the text of a single line LINE,
993 flagging it with the characters in LINE_FLAG (which say whether
994 the line is inserted, deleted, changed, etc.). LINE_FLAG must not
995 end in a blank, unless it is a single blank. If SKIP_NL is set, then
996 the final '\n' is not printed. */
998 void
999 print_1_line_nl (char const *line_flag, char const *const *line, bool skip_nl)
1001 char const *base = line[0], *limit = line[1]; /* Help the compiler. */
1002 FILE *out = outfile; /* Help the compiler some more. */
1003 char const *flag_format = nullptr;
1005 /* If -T was specified, use a Tab between the line-flag and the text.
1006 Otherwise use a Space (as Unix diff does).
1007 Print neither space nor tab if line-flags are empty.
1008 But omit trailing blanks if requested. */
1010 if (line_flag && *line_flag)
1012 char const *flag_format_1 = flag_format = initial_tab ? "%s\t" : "%s ";
1013 char const *line_flag_1 = line_flag;
1015 if (suppress_blank_empty && **line == '\n')
1017 flag_format_1 = "%s";
1019 /* This hack to omit trailing blanks takes advantage of the
1020 fact that the only way that LINE_FLAG can end in a blank
1021 is when LINE_FLAG consists of a single blank. */
1022 line_flag_1 += *line_flag_1 == ' ';
1025 fprintf (out, flag_format_1, line_flag_1);
1028 output_1_line (base, limit - (skip_nl && limit[-1] == '\n'), flag_format, line_flag);
1030 if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
1032 set_color_context (RESET_CONTEXT);
1033 fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
1037 /* Output a line from BASE up to LIMIT.
1038 With -t, expand white space characters to spaces, and if FLAG_FORMAT
1039 is nonzero, output it with argument LINE_FLAG after every
1040 internal carriage return, so that tab stops continue to line up. */
1042 void
1043 output_1_line (char const *base, char const *limit, char const *flag_format,
1044 char const *line_flag)
1046 enum { MAX_CHUNK = 1024 };
1047 if (!expand_tabs)
1049 idx_t left = limit - base;
1050 while (left)
1052 idx_t to_write = MIN (left, MAX_CHUNK);
1053 idx_t written = fwrite (base, sizeof (char), to_write, outfile);
1054 process_signals ();
1055 if (written < to_write)
1056 return;
1057 base += written;
1058 left -= written;
1061 else
1063 FILE *out = outfile;
1064 char const *t = base;
1065 intmax_t tab = 0, column = 0, tab_size = tabsize;
1066 int counter_proc_signals = 0;
1068 while (t < limit)
1070 counter_proc_signals++;
1071 if (counter_proc_signals == MAX_CHUNK)
1073 process_signals ();
1074 counter_proc_signals = 0;
1077 switch (*t)
1079 case '\t':
1080 t++;
1082 if (putc (' ', out) < 0)
1083 return;
1084 while (++column < tab_size);
1086 tab++;
1087 column = 0;
1088 break;
1090 case '\r':
1091 t++;
1092 if (putc ('\r', out) < 0)
1093 return;
1094 if (flag_format && t < limit && *t != '\n')
1095 if (fprintf (out, flag_format, line_flag) < 0)
1096 return;
1097 tab = column = 0;
1098 break;
1100 case '\b':
1101 t++;
1102 if (0 < column)
1103 column--;
1104 else if (0 < tab)
1106 tab--;
1107 column = tab_size - 1;
1109 else
1110 continue;
1111 if (putc ('\b', out) < 0)
1112 return;
1113 break;
1115 default:;
1116 mcel_t g = mcel_scan (t, limit);
1117 column += g.err ? 1 : c32isprint (g.ch) ? c32width (g.ch) : 0;
1118 tab += column / tab_size;
1119 column %= tab_size;
1120 if (fwrite (t, sizeof *t, g.len, outfile) != g.len)
1121 return;
1122 t += g.len;
1123 break;
1129 enum indicator_no
1131 C_LEFT, C_RIGHT, C_END, C_RESET, C_HEADER, C_ADD, C_DELETE, C_LINE
1134 static void
1135 put_indicator (const struct bin_str *ind)
1137 fwrite (ind->string, ind->len, 1, outfile);
1140 static enum color_context last_context = RESET_CONTEXT;
1142 void
1143 set_color_context (enum color_context color_context)
1145 if (color_context != RESET_CONTEXT)
1146 process_signals ();
1147 if (colors_enabled && last_context != color_context)
1149 put_indicator (&color_indicator[C_LEFT]);
1150 switch (color_context)
1152 case HEADER_CONTEXT:
1153 put_indicator (&color_indicator[C_HEADER]);
1154 break;
1156 case LINE_NUMBER_CONTEXT:
1157 put_indicator (&color_indicator[C_LINE]);
1158 break;
1160 case ADD_CONTEXT:
1161 put_indicator (&color_indicator[C_ADD]);
1162 break;
1164 case DELETE_CONTEXT:
1165 put_indicator (&color_indicator[C_DELETE]);
1166 break;
1168 case RESET_CONTEXT:
1169 put_indicator (&color_indicator[C_RESET]);
1170 break;
1172 default:
1173 unreachable ();
1175 put_indicator (&color_indicator[C_RIGHT]);
1176 last_context = color_context;
1181 char const change_letter[] = { '\0', 'd', 'a', 'c' };
1183 /* Translate an internal line number (an index into diff's table of lines)
1184 into an actual line number in the input file.
1185 The internal line number is I. FILE points to the data on the file.
1187 Internal line numbers count from 0 starting after the prefix.
1188 Actual line numbers count from 1 within the entire file. */
1191 translate_line_number (struct file_data const *file, lin i)
1193 return i + file->prefix_lines + 1;
1196 /* Translate a line number range. */
1198 void
1199 translate_range (struct file_data const *file,
1200 lin a, lin b,
1201 lin *aptr, lin *bptr)
1203 *aptr = translate_line_number (file, a - 1) + 1;
1204 *bptr = translate_line_number (file, b + 1) - 1;
1207 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
1208 If the two numbers are identical, print just one number.
1210 Args A and B are internal line numbers.
1211 We print the translated (real) line numbers. */
1213 void
1214 print_number_range (char sepchar, struct file_data *file, lin a, lin b)
1216 lin trans_a, trans_b;
1217 translate_range (file, a, b, &trans_a, &trans_b);
1219 /* Note: we can have B < A in the case of a range of no lines.
1220 In this case, we should print the line number before the range,
1221 which is B. */
1222 if (trans_b > trans_a)
1223 fprintf (outfile, "%"pI"d%c%"pI"d", trans_a, sepchar, trans_b);
1224 else
1225 fprintf (outfile, "%"pI"d", trans_b);
1228 /* Look at a hunk of edit script and report the range of lines in each file
1229 that it applies to. HUNK is the start of the hunk, which is a chain
1230 of 'struct change'. The first and last line numbers of file 0 are stored in
1231 *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
1232 Note that these are internal line numbers that count from 0.
1234 If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
1236 Return UNCHANGED if only ignorable lines are inserted or deleted,
1237 OLD if lines of file 0 are deleted,
1238 NEW if lines of file 1 are inserted,
1239 and CHANGED if both kinds of changes are found. */
1241 enum changes
1242 analyze_hunk (struct change *hunk,
1243 lin *first0, lin *last0,
1244 lin *first1, lin *last1)
1246 bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
1247 int trivial_length = ignore_blank_lines - 1;
1248 /* If 0, ignore zero-length lines;
1249 if -1, do not ignore lines just because of their length. */
1251 bool skip_white_space =
1252 ignore_blank_lines && IGNORE_TRAILING_SPACE <= ignore_white_space;
1253 bool skip_leading_white_space =
1254 skip_white_space && IGNORE_SPACE_CHANGE <= ignore_white_space;
1256 /* Help the compiler. */
1257 char const *const *linbuf0 = curr.file[0].linbuf;
1258 char const *const *linbuf1 = curr.file[1].linbuf;
1260 lin show_from = 0, show_to = 0;
1262 *first0 = hunk->line0;
1263 *first1 = hunk->line1;
1265 struct change *next = hunk;
1266 lin l0, l1;
1269 l0 = next->line0 + next->deleted - 1;
1270 l1 = next->line1 + next->inserted - 1;
1271 show_from += next->deleted;
1272 show_to += next->inserted;
1274 for (lin i = next->line0; i <= l0 && trivial; i++)
1276 char const *line = linbuf0[i];
1277 char const *lastbyte = linbuf0[i + 1] - 1;
1278 char const *newline = lastbyte + (*lastbyte != '\n');
1279 idx_t len = newline - line;
1280 char const *p = line;
1281 if (skip_white_space)
1282 while (*p != '\n')
1284 mcel_t g = mcel_scan (p, newline);
1285 if (! c32isspace (g.ch))
1287 if (! skip_leading_white_space)
1288 p = line;
1289 break;
1291 p += g.len;
1293 if (newline - p != trivial_length
1294 && (! ignore_regexp.fastmap
1295 || (re_search (&ignore_regexp, line, len, 0, len, nullptr)
1296 < 0)))
1297 trivial = false;
1300 for (lin i = next->line1; i <= l1 && trivial; i++)
1302 char const *line = linbuf1[i];
1303 char const *lastbyte = linbuf1[i + 1] - 1;
1304 char const *newline = lastbyte + (*lastbyte != '\n');
1305 idx_t len = newline - line;
1306 char const *p = line;
1307 if (skip_white_space)
1308 while (*p != '\n')
1310 mcel_t g = mcel_scan (p, newline);
1311 if (! c32isspace (g.ch))
1313 if (! skip_leading_white_space)
1314 p = line;
1315 break;
1317 p += g.len;
1319 if (newline - p != trivial_length
1320 && (! ignore_regexp.fastmap
1321 || (re_search (&ignore_regexp, line, len, 0, len, nullptr)
1322 < 0)))
1323 trivial = false;
1326 while ((next = next->link));
1328 *last0 = l0;
1329 *last1 = l1;
1331 /* If all inserted or deleted lines are ignorable,
1332 tell the caller to ignore this hunk. */
1334 if (trivial)
1335 return UNCHANGED;
1337 return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
1340 #ifdef DEBUG
1341 void
1342 debug_script (struct change *sp)
1344 fflush (stdout);
1346 for (; sp; sp = sp->link)
1347 fprintf (stderr, "%3"pI"d %3"pI"d delete %"pI"d insert %"pI"d\n",
1348 sp->line0, sp->line1, sp->deleted, sp->inserted);
1350 fflush (stderr);
1352 #endif