8354 sync regcomp(3C) with upstream (fix make catalog)
[unleashed/tickless.git] / usr / src / cmd / filesync / recon.c
blob3ba61b0f5cb302c596dd81d2796706177ccaed56
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved
25 * module:
26 * recon.c
28 * purpose:
29 * process the reconciliation list, figure out exactly what the
30 * changes were, and what we should do about them.
32 * contents:
33 * reconcile ... (top level) process the reconciliation list
34 * samedata .... (static) do two files have the same contents
35 * samestuff ... (static) do two files have the same ownership/protection
36 * samecompare . (static) actually read and compare the contents
37 * samelink .... (static) do two symlinks have the same contents
38 * truncated ... (static) was one of the two copies truncted
39 * older ....... (static) which copy is older
40 * newer ....... (static) which copy is newer
41 * full_name ... generate a full path name for a file
43 * notes:
44 * If you only study one routine in this whole program, reconcile
45 * is that routine. Everything else is just book keeping.
47 * things were put onto the reconciliation list because analyze
48 * thought that they might have changed ... but up until now
49 * nobody has figured out what the changes really were, or even
50 * if there really were any changes.
52 * queue_file has ordered the reconciliation list with directory
53 * creations first (depth ordered) and deletions last (inversely
54 * depth ordered). all other changes have been ordered by mod time.
56 #ident "%W% %E% SMI"
58 #include <stdio.h>
59 #include <unistd.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <fcntl.h>
64 #include "filesync.h"
65 #include "database.h"
66 #include "messages.h"
67 #include "debug.h"
70 * local routines to figure out how the files really differ
72 static bool_t samedata(struct file *);
73 static bool_t samestuff(struct file *);
74 static bool_t samecompare(struct file *);
75 static bool_t truncated(struct file *);
76 static bool_t samelink();
77 static side_t newer(struct file *);
78 static side_t older(struct file *);
81 * globals
83 char *srcname; /* file we are emulating */
84 char *dstname; /* file we are updating */
87 * routine:
88 * reconcile
90 * purpose:
91 * to perform the reconciliation action associated with a file
93 * parameters:
94 * file pointer
96 * returns:
97 * built up error mask
98 * updated statistics
100 * notes:
101 * The switch statement handles the obvious stuff.
102 * The TRUE side of the samedata test handles minor differences.
103 * The interesting stuff is in the FALSE side of the samedata test.
105 * The desparation heuristics (in the diffmask&CONTENTS test) are
106 * not rigorously correct ... but they always try do the right thing
107 * with data, and only lose mode/ownership changes in relatively
108 * pathological cases. But I claim that the benefits outweigh the
109 * risks, and most users will be pleased with the resulting decisions.
111 * Another trick is in the deletion cases of the switch. We
112 * normally won't allow an unlink that conflicts with data
113 * changes. If there are multiple links to the file, however,
114 * we can make the changes and do the deletion.
116 * The action routines do_{remove,rename,like,copy} handle all
117 * of their own statistics and status updating. This routine
118 * only has to handle its own reconciliation failures (when we
119 * can't decide what to do).
121 errmask_t
122 reconcile(struct file *fp)
123 { errmask_t errs = 0;
124 diffmask_t diffmask;
126 if (opt_debug & DBG_RECON)
127 fprintf(stderr, "RECO: %s flgs=%s, mtime=%08lx.%08lx\n",
128 fp->f_fullname,
129 showflags(fileflags, fp->f_flags),
130 fp->f_modtime, fp->f_modns);
133 * form the fully qualified names for both files
135 srcname = full_name(fp, OPT_SRC, OPT_SRC);
136 dstname = full_name(fp, OPT_DST, OPT_DST);
139 * because they are so expensive to read and so troublesome
140 * to set, we try to put off reading ACLs as long as possible.
141 * If we haven't read them yet, we must read them now (so that
142 * samestuff can compare them).
144 if (opt_acls == 0 && fp->f_info[ OPT_BASE ].f_numacls == 0) {
145 if (get_acls(srcname, &fp->f_info[ OPT_SRC ]))
146 fp->f_srcdiffs |= D_FACLS;
147 if (get_acls(dstname, &fp->f_info[ OPT_DST ]))
148 fp->f_dstdiffs |= D_FACLS;
152 * If a rename has been detected, we don't have to figure
153 * it out, since both the rename-to and rename-from files
154 * have already been designated. When we encounter a rename-to
155 * we should carry it out. When we encounter a rename-from
156 * we can ignore it, since it should be dealt with as a side
157 * effect of processing the rename-to.
159 if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_RENAME_FROM)
160 return (0);
162 if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_RENAME_TO) {
164 if (opt_verbose)
165 fprintf(stdout, gettext(V_renamed),
166 fp->f_previous->f_fullname, fp->f_name);
168 if (fp->f_srcdiffs & D_RENAME_TO) {
169 errs = do_rename(fp, OPT_DST);
170 fp->f_srcdiffs &= D_MTIME | D_SIZE;
171 } else if (fp->f_dstdiffs & D_RENAME_TO) {
172 errs = do_rename(fp, OPT_SRC);
173 fp->f_dstdiffs &= D_MTIME | D_SIZE;
176 if (errs != ERR_RESOLVABLE)
177 goto done;
180 * if any differences remain, then we may be dealing
181 * with contents changes in addition to a rename
183 if ((fp->f_srcdiffs | fp->f_dstdiffs) == 0)
184 goto done;
187 * fall through to reconcile the data changes
192 * pull of the easy cases (non-conflict creations & deletions)
194 switch (fp->f_flags & (F_WHEREFOUND)) {
195 case F_IN_BASELINE: /* only exists in baseline */
196 case 0: /* only exists in rules */
197 if (opt_verbose)
198 fprintf(stdout, gettext(V_nomore),
199 fp->f_fullname);
200 fp->f_flags |= F_REMOVE; /* fix baseline */
201 return (0);
203 case F_IN_BASELINE|F_IN_SOURCE: /* deleted from dest */
205 * the basic principle here is that we are willing
206 * to do the deletion if:
207 * no changes were made on the other side
208 * OR
209 * we have been told to force in this direction
211 * we do, however, make an exception for files that
212 * will still have other links. In this case, the
213 * (changed) data will still be accessable through
214 * another link and so we are willing to do the unlink
215 * inspite of conflicting changes (which may well
216 * have been introduced through another link.
218 * The jury is still out on this one
220 if (((fp->f_srcdiffs&D_IMPORTANT) == 0) ||
221 (opt_force == OPT_DST) ||
222 has_other_links(fp, OPT_SRC)) {
223 if (opt_verbose)
224 fprintf(stdout, gettext(V_deleted),
225 fp->f_fullname, "dst");
226 errs = do_remove(fp, OPT_SRC);
227 goto done;
230 /* a deletion combined with changes */
231 if (opt_verbose)
232 fprintf(stdout, gettext(V_delconf),
233 fp->f_fullname);
235 /* if we are to resolve in favor of source */
236 if (opt_force == OPT_SRC) {
237 errs = do_copy(fp, OPT_DST);
238 goto done;
241 fp->f_problem = gettext(PROB_del_change);
242 goto cant;
244 case F_IN_BASELINE|F_IN_DEST: /* deleted from src */
245 /* just like previous case, w/sides reversed */
246 if (((fp->f_dstdiffs&D_IMPORTANT) == 0) ||
247 (opt_force == OPT_SRC) ||
248 has_other_links(fp, OPT_DST)) {
249 if (opt_verbose)
250 fprintf(stdout, gettext(V_deleted),
251 fp->f_fullname, "src");
252 errs = do_remove(fp, OPT_DST);
253 goto done;
256 /* a deletion combined with changes */
257 if (opt_verbose)
258 fprintf(stdout, gettext(V_delconf),
259 fp->f_fullname);
261 /* if we are to resolve in favor of destination */
262 if (opt_force == OPT_DST) {
263 errs = do_copy(fp, OPT_SRC);
264 goto done;
267 fp->f_problem = gettext(PROB_del_change);
268 goto cant;
271 * if something new shows up, and for some reason we cannot
272 * propagate it to the other side, we should suppress the
273 * file from the baseline, so it will show up as a new
274 * creation next time too.
276 case F_IN_SOURCE: /* created in src */
277 if (opt_verbose)
278 fprintf(stdout, gettext(V_created),
279 fp->f_fullname, "src");
280 errs = do_copy(fp, OPT_DST);
281 goto done;
283 case F_IN_DEST: /* created in dest */
284 if (opt_verbose)
285 fprintf(stdout, gettext(V_created),
286 fp->f_fullname, "dst");
287 errs = do_copy(fp, OPT_SRC);
288 goto done;
290 case F_IN_SOURCE|F_IN_DEST: /* not in baseline */
292 * since we don't have a baseline, we cannot
293 * know which of the two copies should prevail
295 break;
297 case F_IN_BASELINE|F_IN_SOURCE|F_IN_DEST:
299 * we have a baseline where the two copies agreed,
300 * so maybe we can determine that only one of the
301 * two copies have changed ... but before we decide
302 * who should be the winner we should determine
303 * that the two copies are actually different.
305 break;
309 * if we have fallen out of the case statement, it is because
310 * we have discovered a non-obvious situation where potentially
311 * changed versions of the file exist on both sides.
313 * if the two copies turn out to be identical, this is simple
315 if (samedata(fp)) {
316 if (samestuff(fp)) {
317 /* files are identical, just update baseline */
318 if (opt_verbose)
319 fprintf(stdout, gettext(V_unchanged),
320 fp->f_fullname);
321 update_info(fp, OPT_SRC);
322 goto done;
323 } else {
325 * contents agree but ownership/protection does
326 * not agree, so we have to bring these into
327 * agreement. We can pick a winner if one
328 * side hasn't changed, or if the user has
329 * specified a force flag.
331 if (opt_verbose)
332 fprintf(stdout, gettext(V_modes),
333 fp->f_fullname);
335 if (((fp->f_srcdiffs & D_ADMIN) == 0) ||
336 (opt_force == OPT_DST)) {
337 errs = do_like(fp, OPT_SRC, TRUE);
338 goto done;
341 if (((fp->f_dstdiffs & D_ADMIN) == 0) ||
342 (opt_force == OPT_SRC)) {
343 errs = do_like(fp, OPT_DST, TRUE);
344 goto done;
347 /* falls down to cant */
348 } else {
350 * The two files have different contents, so we have
351 * a potential conflict here. If we know that only one
352 * side has changed, we go with that side.
354 if (fp->f_dstdiffs == 0 || fp->f_srcdiffs == 0) {
355 if (opt_verbose)
356 fprintf(stdout, gettext(V_changed),
357 fp->f_fullname);
358 errs = do_copy(fp, fp->f_srcdiffs ? OPT_DST : OPT_SRC);
359 goto done;
363 * Both sides have changed, so we have a real conflict.
365 if (opt_verbose)
366 fprintf(stdout,
367 gettext(truncated(fp) ?
368 V_trunconf : V_different),
369 fp->f_fullname);
372 * See if the user has given us explicit instructions
373 * on how to resolve conflicts. We may have been told
374 * to favor the older, the newer, the source, or the
375 * destination ... but the default is to leave the
376 * conflict unresolved.
378 if (opt_force == OPT_OLD) {
379 errs = do_copy(fp, newer(fp));
380 goto done;
383 if (opt_force == OPT_NEW) {
384 errs = do_copy(fp, older(fp));
385 goto done;
388 if (opt_force != 0) {
389 errs = do_copy(fp, (opt_force == OPT_SRC) ?
390 OPT_DST : OPT_SRC);
391 goto done;
396 * This is our last chance before giving up.
398 * We know that the files have different contents and
399 * that there were changes on both sides. The only way
400 * we can safely handle this is if there were pure contents
401 * changes on one side and pure ownership changes on the
402 * other side. In this case we can propagate the ownership
403 * one way and the contents the other way.
405 * We decide whether or not this is possible by ANDing
406 * together the changes on the two sides, and seeing
407 * if the changes were all orthogonal (none of the same
408 * things changed on both sides).
410 diffmask = fp->f_srcdiffs & fp->f_dstdiffs;
411 if ((diffmask & D_CONTENTS) == 0) {
413 * if ownership changes were only made on one side
414 * (presumably the side that didn't have data changes)
415 * we can handle them separately. In this case,
416 * ownership changes must be fixed first, because
417 * the subsequent do_copy will overwrite them.
419 if ((diffmask & D_ADMIN) == 0)
420 errs |= do_like(fp, (fp->f_srcdiffs&D_ADMIN) ?
421 OPT_DST : OPT_SRC,
422 TRUE);
425 * Now we can deal with the propagation of the data
426 * changes. Note that any ownership/protection
427 * changes (from the other side) that have not been
428 * propagated yet are about to be lost. The cases
429 * in which this might happen are all pathological
430 * and the consequences of losing the protection
431 * changes are (IMHO) minor when compared to the
432 * obviously correct data propagation.
434 errs |= do_copy(fp, (fp->f_srcdiffs&D_CONTENTS) ?
435 OPT_DST : OPT_SRC);
436 goto done;
440 * there are conflicting changes, nobody has told us how to
441 * resolve conflicts, and we cannot figure out how to merge
442 * the differences.
444 fp->f_problem = gettext(PROB_different);
447 cant:
449 * I'm not smart enough to resolve this conflict automatically,
450 * so I have no choice but to bounce it back to the user.
452 fp->f_flags |= F_CONFLICT;
453 fp->f_base->b_unresolved++;
454 errs |= ERR_UNRESOLVED;
456 done:
458 * if we have a conflict and the file is not in the baseline,
459 * then there was never any point at which the two copies were
460 * in agreement, and we want to preserve the conflict for future
461 * resolution.
463 if ((errs&ERR_UNRESOLVED) && (fp->f_flags & F_IN_BASELINE) == 0)
464 if (fp->f_files == 0)
466 * in most cases, this is most easily done by just
467 * excluding the file in question from the baseline
469 fp->f_flags |= F_REMOVE;
470 else
472 * but ... if the file in question is a directory
473 * with children, excluding it from the baseline
474 * would keep all of its children (even those with
475 * no conflicts) out of the baseline as well. In
476 * This case, it is better to tell a lie and to
477 * manufacture a point of imaginary agreement
478 * in the baseline ... but one that is absurd enough
479 * that we will still see conflicts each time we run.
481 * recording a type of directory, and everything
482 * else as zero should be absurd enough.
484 fp->f_info[ OPT_BASE ].f_type = S_IFDIR;
486 if (opt_debug & DBG_MISC)
487 fprintf(stderr, "MISC: %s ERRS=%s\n", fp->f_fullname,
488 showflags(errmap, errs));
490 return (errs);
494 * routine:
495 * newer
497 * purpose:
498 * determine which of two files is newer
500 * parameters:
501 * struct file
503 * returns:
504 * side_t (src/dest)
506 static side_t
507 newer(struct file *fp)
509 struct fileinfo *sp, *dp;
511 sp = &fp->f_info[OPT_SRC];
512 dp = &fp->f_info[OPT_DST];
514 if (sp->f_modtime > dp->f_modtime)
515 return (OPT_SRC);
517 if (sp->f_modtime < dp->f_modtime)
518 return (OPT_DST);
520 if (sp->f_modns >= dp->f_modns)
521 return (OPT_SRC);
523 return (OPT_DST);
527 * routine:
528 * older
530 * purpose:
531 * determine which of two files is older
533 * parameters:
534 * struct file
536 * returns:
537 * side_t (src/dest)
539 static side_t
540 older(struct file *fp)
542 struct fileinfo *sp, *dp;
544 sp = &fp->f_info[OPT_SRC];
545 dp = &fp->f_info[OPT_DST];
547 if (sp->f_modtime < dp->f_modtime)
548 return (OPT_SRC);
550 if (sp->f_modtime > dp->f_modtime)
551 return (OPT_DST);
553 if (sp->f_modns <= dp->f_modns)
554 return (OPT_SRC);
556 return (OPT_DST);
560 * routine:
561 * samedata
563 * purpose:
564 * determine whether or not two files contain the same data
566 * parameters:
567 * struct file
569 * returns:
570 * bool_t (true/false)
572 static bool_t
573 samedata(struct file *fp)
575 struct fileinfo *sp, *dp;
577 sp = &fp->f_info[OPT_SRC];
578 dp = &fp->f_info[OPT_DST];
580 /* cheap test: types are different */
581 if (sp->f_type != dp->f_type)
582 return (FALSE);
584 /* cheap test: directories have same contents */
585 if (sp->f_type == S_IFDIR)
586 return (TRUE);
588 /* special files are compared via their maj/min */
589 if ((sp->f_type == S_IFBLK) || (sp->f_type == S_IFCHR)) {
590 if (sp->f_rd_maj != dp->f_rd_maj)
591 return (FALSE);
592 if (sp->f_rd_min != dp->f_rd_min)
593 return (FALSE);
594 return (TRUE);
597 /* symlinks are the same if their contents are the same */
598 if (sp->f_type == S_IFLNK)
599 return (samelink());
601 /* cheap test: sizes are different */
602 if (fp->f_info[OPT_SRC].f_size != fp->f_info[OPT_DST].f_size)
603 return (FALSE);
605 /* expensive test: byte for byte comparison */
606 if (samecompare(fp) == 0)
607 return (FALSE);
609 return (TRUE);
613 * routine:
614 * samestuff
616 * purpose:
617 * determine whether or not two files have same owner/protection
619 * parameters:
620 * struct file
622 * returns:
623 * bool_t (true/false)
625 static bool_t
626 samestuff(struct file *fp)
627 { int same_mode, same_uid, same_gid, same_acl;
628 struct fileinfo *sp, *dp;
630 sp = &fp->f_info[OPT_SRC];
631 dp = &fp->f_info[OPT_DST];
633 same_mode = (sp->f_mode == dp->f_mode);
634 same_uid = (sp->f_uid == dp->f_uid);
635 same_gid = (sp->f_gid == dp->f_gid);
636 same_acl = cmp_acls(sp, dp);
638 /* if the are all the same, it is easy to tell the truth */
639 if (same_uid && same_gid && same_mode && same_acl)
640 return (TRUE);
642 /* note the nature of the conflict */
643 if (!same_uid || !same_gid || !same_acl)
644 fp->f_problem = gettext(PROB_ownership);
645 else
646 fp->f_problem = gettext(PROB_protection);
648 return (FALSE);
652 * routine:
653 * samecompare
655 * purpose:
656 * do a byte-for-byte comparison of two files
658 * parameters:
659 * struct file
661 * returns:
662 * bool_t (true/false)
664 static bool_t
665 samecompare(struct file *fp)
666 { int sfd, dfd;
667 int i, count;
668 char srcbuf[ COPY_BSIZE ], dstbuf[ COPY_BSIZE ];
669 bool_t same = TRUE;
672 sfd = open(srcname, 0);
673 if (sfd < 0)
674 return (FALSE);
676 dfd = open(dstname, 0);
677 if (dfd < 0) {
678 close(sfd);
679 return (FALSE);
682 for (
683 count = read(sfd, srcbuf, COPY_BSIZE);
684 count > 0;
685 count = read(sfd, srcbuf, COPY_BSIZE)) {
687 /* do a matching read */
688 if (read(dfd, dstbuf, COPY_BSIZE) != count) {
689 same = FALSE;
690 goto done;
693 /* do the comparison for this block */
694 for (i = 0; i < count; i++) {
695 if (srcbuf[i] != dstbuf[i]) {
696 same = FALSE;
697 goto done;
702 done:
703 if (opt_debug & DBG_ANAL)
704 fprintf(stderr, "ANAL: SAME=%d %s\n", same, fp->f_fullname);
706 close(sfd);
707 close(dfd);
708 return (same);
712 * routine:
713 * truncated
715 * purpose:
716 * to determine whether or not a file has been truncated
718 * parameters:
719 * pointer to file structure
721 * returns:
722 * true/false
724 static bool_t
725 truncated(struct file *fp)
727 /* either source or destination must now be zero length */
728 if (fp->f_info[OPT_SRC].f_size && fp->f_info[OPT_DST].f_size)
729 return (FALSE);
731 /* file must have originally had a non-zero length */
732 if (fp->f_info[OPT_BASE].f_size == 0)
733 return (FALSE);
735 /* file type must "normal" all around */
736 if (fp->f_info[OPT_BASE].f_type != S_IFREG)
737 return (FALSE);
738 if (fp->f_info[OPT_SRC].f_type != S_IFREG)
739 return (FALSE);
740 if (fp->f_info[OPT_DST].f_type != S_IFREG)
741 return (FALSE);
744 return (TRUE);
748 * routine:
749 * samelink
751 * purpose:
752 * to determine whether or not two symbolic links agree
754 * parameters:
755 * pointer to file structure
757 * returns:
758 * true/false
760 static bool_t
761 samelink()
762 { int i, srclen, dstlen;
763 char srcbuf[ MAX_PATH ], dstbuf[ MAX_PATH ];
766 /* read both copies of the link */
767 srclen = readlink(srcname, srcbuf, sizeof (srcbuf));
768 dstlen = readlink(dstname, dstbuf, sizeof (dstbuf));
770 /* if they aren't the same length, they disagree */
771 if (srclen < 0 || dstlen < 0 || srclen != dstlen)
772 return (FALSE);
774 /* look for differences in contents */
775 for (i = 0; i < srclen; i++)
776 if (srcbuf[i] != dstbuf[i])
777 return (FALSE);
779 return (TRUE);
783 * routine:
784 * full_name
786 * purpose:
787 * to figure out the fully qualified path name to a file on the
788 * reconciliation list.
790 * parameters:
791 * pointer to the file structure
792 * side indication for which base to use
793 * side indication for which buffer to use
795 * returns:
796 * pointer to a clobberable buffer
798 * notes:
799 * the zero'th buffer is used for renames and links, where
800 * we need the name of another file on the same side.
802 char *
803 full_name(struct file *fp, side_t srcdst, side_t whichbuf)
804 { static char *buffers[3];
805 static int buflen = 0;
806 char *p, *b;
807 int l;
809 /* see if the existing buffer is long enough */
810 b = (srcdst == OPT_SRC) ? fp->f_base->b_src_name
811 : fp->f_base->b_dst_name;
813 /* see if the allocated buffer is long enough */
814 l = strlen(b) + strlen(fp->f_fullname) + 2;
815 if (l > buflen) {
816 /* figure out the next "nice" size to use */
817 for (buflen = MAX_PATH; buflen < l; buflen += MAX_NAME);
819 /* reallocate all buffers to this size */
820 for (l = 0; l < 3; l++) {
821 buffers[l] = (char *) realloc(buffers[l], buflen);
822 if (buffers[l] == 0)
823 nomem("full name");
827 /* assemble the name in the buffer and reurn it */
828 p = buffers[whichbuf];
829 strcpy(p, b);
830 strcat(p, "/");
831 strcat(p, fp->f_fullname);
832 return (p);