5 # The contents of this file are subject to the terms of the
6 # Common Development and Distribution License (the "License").
7 # You may not use this file except in compliance with the License.
9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 # or http://www.opensolaris.org/os/licensing.
11 # See the License for the specific language governing permissions
12 # and limitations under the License.
14 # When distributing Covered Code, include this CDDL HEADER in each
15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 # If applicable, add the following below this CDDL HEADER, with the
17 # fields enclosed by brackets "[]" replaced with your own identifying
18 # information: Portions Copyright [yyyy] [name of copyright owner]
24 # Copyright 2010 Sun Microsystems, Inc. All rights reserved.
25 # Use is subject to license terms.
29 # Compare the content generated by a build to a set of manifests
30 # describing how that content is to be delivered.
39 from pkg
import actions
40 from pkg
import manifest
44 # Dictionary used to map action names to output format. Each entry is
45 # indexed by action name, and consists of a list of tuples that map
46 # FileInfo class members to output labels.
63 ("mediator", "mediator="),
69 ("hardkey", "target=")
73 # Mode checks used to validate safe file and directory permissions
74 ALLMODECHECKS
= frozenset(("m", "w", "s", "o"))
75 DEFAULTMODECHECKS
= frozenset(("m", "w", "o"))
77 class FileInfo(object):
78 """Base class to represent a file.
80 Subclassed according to whether the file represents an actual filesystem
81 object (RealFileInfo) or an IPS manifest action (ActionInfo).
92 self
.hardpaths
= set()
96 """Return the IPS action name of a FileInfo object.
109 def checkmodes(self
, modechecks
):
110 """Check for and report on unsafe permissions.
112 Returns a potentially empty list of warning strings.
117 if t
in ("link", "hardlink"):
119 m
= int(self
.mode
, 8)
123 if "s" in modechecks
and t
== "file":
124 if m
& (stat
.S_ISUID | stat
.S_ISGID
):
125 if m
& (stat
.S_IRGRP | stat
.S_IROTH
):
126 w
.extend(["%s: 0%o: setuid/setgid file should not be " \
127 "readable by group or other" % (p
, m
)])
129 if "o" in modechecks
and o
!= "root" and ((m
& stat
.S_ISUID
) == 0):
130 mu
= (m
& stat
.S_IRWXU
) >> 6
131 mg
= (m
& stat
.S_IRWXG
) >> 3
132 mo
= m
& stat
.S_IRWXO
135 if (((mu
& 02) == 0 and (mo
& mg
& 04) == 04) or
136 (t
== "file" and mo
& 01 == 1) or
137 (mg
, mo
) == (mu
, mu
) or
138 ((t
== "file" and not e
or t
== "dir" and o
== "bin") and
139 (mg
& 05 == mo
& 05)) or
140 (t
== "file" and o
== "bin" and mu
& 01 == 01) or
141 (m
& 0105 != 0 and p
.startswith("etc/security/dev/"))):
142 w
.extend(["%s: owner \"%s\" may be safely " \
143 "changed to \"root\"" % (p
, o
)])
145 if "w" in modechecks
and t
== "file" and o
!= "root":
146 uwx
= stat
.S_IWUSR | stat
.S_IXUSR
148 w
.extend(["%s: non-root-owned executable should not " \
149 "also be writable by owner." % p
])
151 if ("m" in modechecks
and
152 m
& (stat
.S_IWGRP | stat
.S_IWOTH
) != 0 and
153 m
& stat
.S_ISVTX
== 0):
154 w
.extend(["%s: 0%o: should not be writable by group or other" %
159 def __ne__(self
, other
):
160 """Compare two FileInfo objects.
162 Note this is the "not equal" comparison, so a return value of False
163 indicates that the objects are functionally equivalent.
166 # Map the objects such that the lhs is always the ActionInfo,
167 # and the rhs is always the RealFileInfo.
169 # It's only really important that the rhs not be an
170 # ActionInfo; if we're comparing FileInfo the RealFileInfo, it
171 # won't actually matter what we choose.
173 if isinstance(self
, ActionInfo
):
181 # Because the manifest may legitimately translate a relative
182 # path from the proto area into a different path on the installed
183 # system, we don't compare paths here. We only expect this comparison
184 # to be invoked on items with identical relative paths in
189 # All comparisons depend on type. For symlink and directory, they
190 # must be the same. For file and hardlink, see below.
194 if typelhs
in ("link", "dir"):
195 if typelhs
!= typerhs
:
199 # For symlinks, all that's left is the link target.
200 # For mediated symlinks targets can differ.
202 if typelhs
== "link":
203 return (lhs
.mediator
is None) and (lhs
.target
!= rhs
.target
)
206 # For a directory, it's important that both be directories,
207 # the modes be identical, and the paths are identical. We already
208 # checked all but the modes above.
210 # If both objects are files, then we're in the same boat.
212 if typelhs
== "dir" or (typelhs
== "file" and typerhs
== "file"):
213 return lhs
.mode
!= rhs
.mode
216 # For files or hardlinks:
218 # Since the key space is different (inodes for real files and
219 # actual link targets for hard links), and since the proto area will
220 # identify all N occurrences as hardlinks, but the manifests as one
221 # file and N-1 hardlinks, we have to compare files to hardlinks.
225 # If they're both hardlinks, we just make sure that
226 # the same target path appears in both sets of
229 if typelhs
== "hardlink" and typerhs
== "hardlink":
230 return len(lhs
.hardpaths
.intersection(rhs
.hardpaths
)) == 0
233 # Otherwise, we have a mix of file and hardlink, so we
234 # need to make sure that the file path appears in the
235 # set of possible target paths for the hardlink.
237 # We already know that the ActionInfo, if present, is the lhs
238 # operator. So it's the rhs operator that's guaranteed to
239 # have a set of hardpaths.
241 return lhs
.path
not in rhs
.hardpaths
244 """Return an action-style representation of a FileInfo object.
246 We don't currently quote items with embedded spaces. If we
247 ever decide to parse this output, we'll want to revisit that.
252 for member
, label
in OUTPUTMAP
[name
]:
253 out
+= " " + label
+ str(getattr(self
, member
))
258 """Return a protolist-style representation of a FileInfo object.
271 elif name
in ("file", "hardlink"):
280 out
= "%c %-30s %-20s %4s %-5s %-5s %6d %2ld - -" % \
281 (ftype
, self
.path
, target
, mode
, owner
, group
, 0, 1)
286 class ActionInfo(FileInfo
):
287 """Object to track information about manifest actions.
289 This currently understands file, link, dir, and hardlink actions.
292 def __init__(self
, action
):
293 FileInfo
.__init
__(self
)
295 # Currently, all actions that we support have a "path"
296 # attribute. If that changes, then we'll need to
297 # catch a KeyError from this assignment.
299 self
.path
= action
.attrs
["path"]
301 if action
.name
== "file":
302 self
.owner
= action
.attrs
["owner"]
303 self
.group
= action
.attrs
["group"]
304 self
.mode
= action
.attrs
["mode"]
305 self
.hash = action
.hash
306 if "preserve" in action
.attrs
:
308 elif action
.name
== "link":
309 target
= action
.attrs
["target"]
310 self
.target
= os
.path
.normpath(target
)
311 self
.mediator
= action
.attrs
.get("mediator")
312 elif action
.name
== "dir":
313 self
.owner
= action
.attrs
["owner"]
314 self
.group
= action
.attrs
["group"]
315 self
.mode
= action
.attrs
["mode"]
317 elif action
.name
== "hardlink":
318 target
= os
.path
.normpath(action
.get_target_path())
319 self
.hardkey
= target
320 self
.hardpaths
.add(target
)
323 def supported(action
):
324 """Indicates whether the specified IPS action time is
325 correctly handled by the ActionInfo constructor.
327 return action
in frozenset(("file", "dir", "link", "hardlink"))
330 class UnsupportedFileFormatError(Exception):
331 """This means that the stat.S_IFMT returned something we don't
332 support, ie a pipe or socket. If it's appropriate for such an
333 object to be in the proto area, then the RealFileInfo constructor
334 will need to evolve to support it, or it will need to be in the
337 def __init__(self
, path
, mode
):
338 Exception.__init
__(self
)
343 return '%s: unsupported S_IFMT %07o' % (self
.path
, self
.mode
)
346 class RealFileInfo(FileInfo
):
347 """Object to track important-to-packaging file information.
349 This currently handles regular files, directories, and symbolic links.
351 For multiple RealFileInfo objects with identical hardkeys, there
352 is no way to determine which of the hard links should be
353 delivered as a file, and which as hardlinks.
356 def __init__(self
, root
=None, path
=None):
357 FileInfo
.__init
__(self
)
359 path
= os
.path
.join(root
, path
)
360 lstat
= os
.lstat(path
)
364 # Per stat.py, these cases are mutually exclusive.
366 if stat
.S_ISREG(mode
):
367 self
.hash = self
.path
368 elif stat
.S_ISDIR(mode
):
370 elif stat
.S_ISLNK(mode
):
371 self
.target
= os
.path
.normpath(os
.readlink(path
))
374 raise UnsupportedFileFormatError(path
, mode
)
376 if not stat
.S_ISLNK(mode
):
377 self
.mode
= "%04o" % stat
.S_IMODE(mode
)
379 # Instead of reading the group and owner from the proto area after
380 # a non-root build, just drop in dummy values. Since we don't
381 # compare them anywhere, this should allow at least marginally
382 # useful comparisons of protolist-style output.
388 # refcount > 1 indicates a hard link
390 if lstat
.st_nlink
> 1:
392 # This could get ugly if multiple proto areas reside
393 # on different filesystems.
395 self
.hardkey
= lstat
.st_ino
398 class DirectoryTree(dict):
399 """Meant to be subclassed according to population method.
401 def __init__(self
, name
):
405 def compare(self
, other
):
406 """Compare two different sets of FileInfo objects.
408 keys1
= frozenset(self
.keys())
409 keys2
= frozenset(other
.keys())
411 common
= keys1
.intersection(keys2
)
412 onlykeys1
= keys1
.difference(common
)
413 onlykeys2
= keys2
.difference(common
)
416 print "Entries present in %s but not %s:" % \
417 (self
.name
, other
.name
)
418 for path
in sorted(onlykeys1
):
419 print("\t%s" % str(self
[path
]))
423 print "Entries present in %s but not %s:" % \
424 (other
.name
, self
.name
)
425 for path
in sorted(onlykeys2
):
426 print("\t%s" % str(other
[path
]))
430 for path
in sorted(common
):
431 if self
[path
] != other
[path
]:
433 nodifferences
= False
434 print "Entries that differ between %s and %s:" \
435 % (self
.name
, other
.name
)
436 print("%14s %s" % (self
.name
, self
[path
]))
437 print("%14s %s" % (other
.name
, other
[path
]))
438 if not nodifferences
:
442 class BadProtolistFormat(Exception):
443 """This means that the user supplied a file via -l, but at least
444 one line from that file doesn't have the right number of fields to
445 parse as protolist output.
448 return 'bad proto list entry: "%s"' % Exception.__str
__(self
)
451 class ProtoTree(DirectoryTree
):
452 """Describes one or more proto directories as a dictionary of
453 RealFileInfo objects, indexed by relative path.
456 def adddir(self
, proto
, exceptions
):
457 """Extends the ProtoTree dictionary with RealFileInfo
458 objects describing the proto dir, indexed by relative
463 pdir
= os
.path
.normpath(proto
)
464 strippdir
= lambda r
, n
: os
.path
.join(r
, n
)[len(pdir
)+1:]
465 for root
, dirs
, files
in os
.walk(pdir
):
466 for name
in dirs
+ files
:
467 path
= strippdir(root
, name
)
468 if path
not in exceptions
:
470 newentries
[path
] = RealFileInfo(pdir
, path
)
472 sys
.stderr
.write("Warning: unable to stat %s: %s\n" %
476 exceptions
.remove(path
)
481 # Find the sets of paths in this proto dir that are hardlinks
484 # It seems wasteful to store this in each FileInfo, but we
485 # otherwise need a linking mechanism. With this information
486 # here, FileInfo object comparison can be self contained.
488 # We limit this aggregation to a single proto dir, as
489 # represented by newentries. That means we don't need to care
490 # about proto dirs on separate filesystems, or about hardlinks
491 # that cross proto dir boundaries.
494 for path
, fileinfo
in newentries
.iteritems():
496 hk2path
.setdefault(fileinfo
.hardkey
, set()).add(path
)
497 for fileinfo
in newentries
.itervalues():
499 fileinfo
.hardpaths
.update(hk2path
[fileinfo
.hardkey
])
500 self
.update(newentries
)
502 def addprotolist(self
, protolist
, exceptions
):
503 """Read in the specified file, assumed to be the
506 This has been tested minimally, and is potentially useful for
507 comparing across the transition period, but should ultimately
512 plist
= open(protolist
)
514 raise IOError("cannot open proto list: %s" % str(exc
))
519 pline
= pline
.split()
521 # Use a FileInfo() object instead of a RealFileInfo()
522 # object because we want to avoid the RealFileInfo
523 # constructor, because there's nothing to actually stat().
525 fileinfo
= FileInfo()
527 if pline
[1] in exceptions
:
528 exceptions
.remove(pline
[1])
531 fileinfo
.isdir
= True
532 fileinfo
.path
= pline
[1]
534 fileinfo
.target
= os
.path
.normpath(pline
[2])
535 fileinfo
.mode
= int("0%s" % pline
[3])
536 fileinfo
.owner
= pline
[4]
537 fileinfo
.group
= pline
[5]
539 fileinfo
.hardkey
= pline
[6]
540 newentries
[pline
[1]] = fileinfo
542 raise BadProtolistFormat(pline
)
546 for path
, fileinfo
in newentries
.iteritems():
548 hk2path
.setdefault(fileinfo
.hardkey
, set()).add(path
)
549 for fileinfo
in newentries
.itervalues():
551 fileinfo
.hardpaths
.update(hk2path
[fileinfo
.hardkey
])
552 self
.update(newentries
)
555 class ManifestParsingError(Exception):
556 """This means that the Manifest.set_content() raised an
557 ActionError. We raise this, instead, to tell us which manifest
558 could not be parsed, rather than what action error we hit.
560 def __init__(self
, mfile
, error
):
561 Exception.__init
__(self
)
566 return "unable to parse manifest %s: %s" % (self
.mfile
, self
.error
)
569 class ManifestTree(DirectoryTree
):
570 """Describes one or more directories containing arbitrarily
571 many manifests as a dictionary of ActionInfo objects, indexed
572 by the relative path of the data source within the proto area.
573 That path may or may not be the same as the path attribute of the
577 def addmanifest(self
, root
, mfile
, arch
, modechecks
, exceptions
):
578 """Treats the specified input file as a pkg(5) package
579 manifest, and extends the ManifestTree dictionary with entries
580 for the actions therein.
582 mfest
= manifest
.Manifest()
584 mfest
.set_content(open(os
.path
.join(root
, mfile
)).read())
586 raise IOError("cannot read manifest: %s" % str(exc
))
587 except actions
.ActionError
, exc
:
588 raise ManifestParsingError(mfile
, str(exc
))
591 # Make sure the manifest is applicable to the user-specified
592 # architecture. Assumption: if variant.arch is not an
593 # attribute of the manifest, then the package should be
594 # installed on all architectures.
596 if arch
not in mfest
.attributes
.get("variant.arch", (arch
,)):
600 for action
in mfest
.gen_actions():
601 if "path" not in action
.attrs
or \
602 not ActionInfo
.supported(action
.name
):
606 # The dir action is currently fully specified, in that it
607 # lists owner, group, and mode attributes. If that
608 # changes in pkg(5) code, we'll need to revisit either this
609 # code or the ActionInfo() constructor. It's possible
610 # that the pkg(5) system could be extended to provide a
611 # mechanism for specifying directory permissions outside
612 # of the individual manifests that deliver files into
613 # those directories. Doing so at time of manifest
614 # processing would mean that validate_pkg continues to work,
615 # but doing so at time of publication would require updates.
619 # See pkgsend(1) for the use of NOHASH for objects with
620 # datastreams. Currently, that means "files," but this
621 # should work for any other such actions.
623 if getattr(action
, "hash", "NOHASH") != "NOHASH":
626 path
= action
.attrs
["path"]
629 # This is the wrong tool in which to enforce consistency
630 # on a set of manifests. So instead of comparing the
631 # different actions with the same "path" attribute, we
638 # As with the manifest itself, if an action has specified
639 # variant.arch, we look for the target architecture
645 # The name of this method changed in pkg(5) build 150, we need to
646 # work with both sets.
648 if hasattr(action
, 'get_variants'):
649 var
= action
.get_variants()
651 var
= action
.get_variant_template()
652 if "variant.arch" in var
and arch
not in var
["variant.arch"]:
655 self
[path
] = ActionInfo(action
)
656 if modechecks
is not None and path
not in exceptions
:
657 modewarnings
.update(self
[path
].checkmodes(modechecks
))
659 if len(modewarnings
) > 0:
660 print "warning: unsafe permissions in %s" % mfile
661 for w
in sorted(modewarnings
):
665 def adddir(self
, mdir
, arch
, modechecks
, exceptions
):
666 """Walks the specified directory looking for pkg(5) manifests.
668 for mfile
in os
.listdir(mdir
):
669 if (mfile
.endswith(".mog") and
670 stat
.S_ISREG(os
.lstat(os
.path
.join(mdir
, mfile
)).st_mode
)):
672 self
.addmanifest(mdir
, mfile
, arch
, modechecks
, exceptions
)
674 sys
.stderr
.write("warning: %s\n" % str(exc
))
676 def resolvehardlinks(self
):
677 """Populates mode, group, and owner for resolved (ie link target
678 is present in the manifest tree) hard links.
680 for info
in self
.values():
681 if info
.name() == "hardlink":
685 info
.owner
= tgtinfo
.owner
686 info
.group
= tgtinfo
.group
687 info
.mode
= tgtinfo
.mode
689 class ExceptionList(set):
690 """Keep track of an exception list as a set of paths to be excluded
691 from any other lists we build.
694 def __init__(self
, files
, arch
):
698 self
.readexceptionfile(fname
, arch
)
700 sys
.stderr
.write("warning: cannot read exception file: %s\n" %
703 def readexceptionfile(self
, efile
, arch
):
704 """Build a list of all pathnames from the specified file that
705 either apply to all architectures (ie which have no trailing
706 architecture tokens), or to the specified architecture (ie
707 which have the value of the arch arg as a trailing
711 excfile
= open(efile
)
715 if len(exc
) and exc
[0][0] != "#":
716 if arch
in (exc
[1:] or arch
):
717 self
.add(os
.path
.normpath(exc
[0]))
722 USAGE
= """%s [-v] -a arch [-e exceptionfile]... [-L|-M [-X check]...] input_1 [input_2]
724 where input_1 and input_2 may specify proto lists, proto areas,
725 or manifest directories. For proto lists, use one or more
729 arguments. For proto areas, use one or more
733 arguments. For manifest directories, use one or more
739 If -L or -M is specified, then only one input source is allowed, and
740 it should be one or more manifest directories. These two options are
743 The -L option is used to generate a proto list to stdout.
745 The -M option is used to check for safe file and directory modes.
746 By default, this causes all mode checks to be performed. Individual
747 mode checks may be turned off using "-X check," where "check" comes
748 from the following set of checks:
750 m check for group or other write permissions
751 w check for user write permissions on files and directories
753 s check for group/other read permission on executable files
754 that have setuid/setgid bit(s)
755 o check for files that could be safely owned by root
760 """Try to give the user useful information when they don't get the
761 command syntax right.
764 sys
.stderr
.write("%s: %s\n" % (sys
.argv
[0], msg
))
765 sys
.stderr
.write(USAGE
)
770 """Compares two out of three possible data sources: a proto list, a
771 set of proto areas, and a set of manifests.
774 opts
, args
= getopt
.getopt(argv
, 'a:e:Ll:Mm:p:vX:')
775 except getopt
.GetoptError
, exc
:
785 manifesttree
= ManifestTree("manifests")
787 prototree
= ProtoTree("proto area")
789 protolist
= ProtoTree("proto list")
791 togglemodechecks
= set()
796 for opt
, arg
in opts
:
799 usage("may only specify one architecture")
803 exceptionlists
.append(arg
)
807 comparing
.add("protolist")
808 protolists
.append(os
.path
.normpath(arg
))
810 modechecks
.update(DEFAULTMODECHECKS
)
812 comparing
.add("manifests")
813 manifestdirs
.append(os
.path
.normpath(arg
))
815 comparing
.add("proto area")
816 protodirs
.append(os
.path
.normpath(arg
))
820 togglemodechecks
.add(arg
)
822 if listonly
or len(modechecks
) > 0:
823 if len(comparing
) != 1 or "manifests" not in comparing
:
824 usage("-L and -M require one or more -m args, and no -l or -p")
825 if listonly
and len(modechecks
) > 0:
826 usage("-L and -M are mutually exclusive")
827 elif len(comparing
) != 2:
828 usage("must specify exactly two of -l, -m, and -p")
830 if len(togglemodechecks
) > 0 and len(modechecks
) == 0:
831 usage("-X requires -M")
833 for s
in togglemodechecks
:
834 if s
not in ALLMODECHECKS
:
835 usage("unknown mode check %s" % s
)
836 modechecks
.symmetric_difference_update((s
))
838 if len(modechecks
) == 0:
842 usage("must specify architecture")
844 exceptions
= ExceptionList(exceptionlists
, arch
)
845 originalexceptions
= exceptions
.copy()
847 if len(manifestdirs
) > 0:
848 for mdir
in manifestdirs
:
849 manifesttree
.adddir(mdir
, arch
, modechecks
, exceptions
)
851 manifesttree
.resolvehardlinks()
852 for info
in manifesttree
.values():
853 print "%s" % info
.protostr()
855 if modechecks
is not None:
857 trees
.append(manifesttree
)
859 if len(protodirs
) > 0:
860 for pdir
in protodirs
:
861 prototree
.adddir(pdir
, exceptions
)
862 trees
.append(prototree
)
864 if len(protolists
) > 0:
865 for plist
in protolists
:
867 protolist
.addprotolist(plist
, exceptions
)
869 sys
.stderr
.write("warning: %s\n" % str(exc
))
870 trees
.append(protolist
)
872 if verbose
and exceptions
:
873 print "Entries present in exception list but missing from proto area:"
874 for exc
in sorted(exceptions
):
878 usedexceptions
= originalexceptions
.difference(exceptions
)
879 harmfulexceptions
= usedexceptions
.intersection(manifesttree
)
880 if harmfulexceptions
:
881 print "Entries present in exception list but also in manifests:"
882 for exc
in sorted(harmfulexceptions
):
884 del manifesttree
[exc
]
887 trees
[0].compare(trees
[1])
889 if __name__
== '__main__':
892 except KeyboardInterrupt: