2 # tree.py: tools for comparing directory trees
4 # Subversion is a tool for revision control.
5 # See http://subversion.tigris.org for more information.
7 # ====================================================================
8 # Copyright (c) 2001, 2006 CollabNet. All rights reserved.
10 # This software is licensed as described in the file COPYING, which
11 # you should have received as part of this distribution. The terms
12 # are also available at http://subversion.tigris.org/license-1.html.
13 # If newer versions of this license are posted there, you may use a
14 # newer version instead, at your option.
16 ######################################################################
22 import main
# the general svntest routines in this module.
23 from svntest
import Failure
27 # All tree exceptions should inherit from SVNTreeError
28 class SVNTreeError(Failure
):
29 "Exception raised if you screw up in the tree module."
32 class SVNTreeUnequal(SVNTreeError
):
33 "Exception raised if two trees are unequal."
36 class SVNTreeIsNotDirectory(SVNTreeError
):
37 "Exception raised if get_child is passed a file."
40 class SVNTypeMismatch(SVNTreeError
):
41 "Exception raised if one node is file and other is dir"
44 #========================================================================
46 # ===> Overview of our Datastructures <===
48 # The general idea here is that many, many things can be represented by
51 # - a working copy's structure and contents
52 # - the output of 'svn status'
53 # - the output of 'svn checkout/update'
54 # - the output of 'svn commit'
56 # The idea is that a test function creates a "expected" tree of some
57 # kind, and is then able to compare it to an "actual" tree that comes
58 # from running the Subversion client. This is what makes a test
59 # automated; if an actual and expected tree match exactly, then the test
60 # has passed. (See compare_trees() below.)
62 # The SVNTreeNode class is the fundamental data type used to build tree
63 # structures. The class contains a method for "dropping" a new node
64 # into an ever-growing tree structure. (See also create_from_path()).
66 # We have four parsers in this file for the four use cases listed above:
67 # each parser examines some kind of input and returns a tree of
68 # SVNTreeNode objects. (See build_tree_from_checkout(),
69 # build_tree_from_commit(), build_tree_from_status(), and
70 # build_tree_from_wc()). These trees are the "actual" trees that result
71 # from running the Subversion client.
73 # Also necessary, of course, is a convenient way for a test to create an
74 # "expected" tree. The test *could* manually construct and link a bunch
75 # of SVNTreeNodes, certainly. But instead, all the tests are using the
76 # build_generic_tree() routine instead.
78 # build_generic_tree() takes a specially-formatted list of lists as
79 # input, and returns a tree of SVNTreeNodes. The list of lists has this
82 # [ ['/full/path/to/item', 'text contents', {prop-hash}, {att-hash}],
87 # You can see that each item in the list essentially defines an
88 # SVNTreeNode. build_generic_tree() instantiates a SVNTreeNode for each
89 # item, and then drops it into a tree by parsing each item's full path.
91 # So a typical test routine spends most of its time preparing lists of
92 # this format and sending them to build_generic_tree(), rather than
93 # building the "expected" trees directly.
95 # ### Note: in the future, we'd like to remove this extra layer of
96 # ### abstraction. We'd like the SVNTreeNode class to be more
97 # ### directly programmer-friendly, providing a number of accessor
98 # ### routines, so that tests can construct trees directly.
100 # The first three fields of each list-item are self-explanatory. It's
101 # the fourth field, the "attribute" hash, that needs some explanation.
102 # The att-hash is used to place extra information about the node itself,
103 # depending on the parsing context:
105 # - in the 'svn co/up' use-case, each line of output starts with two
106 # characters from the set of (A, D, G, U, C, _) or 'Restored'. The
107 # status code is stored in a attribute named 'status'. In the case
108 # of a restored file, the word 'Restored' is stored in an attribute
111 # - in the 'svn ci/im' use-case, each line of output starts with one
112 # of the words (Adding, Deleting, Sending). This verb is stored in
113 # an attribute named 'verb'.
115 # - in the 'svn status' use-case (which is always run with the -v
116 # (--verbose) flag), each line of output contains a working revision
117 # number and a two-letter status code similar to the 'svn co/up'
118 # case. This information is stored in attributes named 'wc_rev'
119 # and 'status'. The repository revision is also printed, but it
122 # - in the working-copy use-case, the att-hash is ignored.
125 # Finally, one last explanation: the file 'actions.py' contain a number
126 # of helper routines named 'run_and_verify_FOO'. These routines take
127 # one or more "expected" trees as input, then run some svn subcommand,
128 # then push the output through an appropriate parser to derive an
129 # "actual" tree. Then it runs compare_trees() and raises an exception
130 # on failure. This is why most tests typically end with a call to
131 # run_and_verify_FOO().
135 #========================================================================
139 # If CHILDREN is None, then the node is a file. Otherwise, CHILDREN
140 # is a list of the nodes making up that directory's children.
142 # NAME is simply the name of the file or directory. CONTENTS is a
143 # string that contains the file's contents (if a file), PROPS are
144 # properties attached to files or dirs, and ATTS is a dictionary of
145 # other metadata attached to the node.
149 def __init__(self
, name
, children
=None, contents
=None, props
={}, atts
={}):
151 self
.children
= children
152 self
.contents
= contents
157 # TODO: Check to make sure contents and children are mutually exclusive
159 def add_child(self
, newchild
):
160 child_already_exists
= 0
161 if self
.children
is None: # if you're a file,
162 self
.children
= [] # become an empty dir.
164 for a
in self
.children
:
165 if a
.name
== newchild
.name
:
166 child_already_exists
= 1
169 if child_already_exists
:
170 if newchild
.children
is None:
171 # this is the 'end' of the chain, so copy any content here.
172 a
.contents
= newchild
.contents
173 a
.props
= newchild
.props
174 a
.atts
= newchild
.atts
175 a
.path
= os
.path
.join (self
.path
, newchild
.name
)
177 # try to add dangling children to your matching node
178 for i
in newchild
.children
:
181 self
.children
.append(newchild
)
182 newchild
.path
= os
.path
.join(self
.path
, newchild
.name
)
185 def pprint(self
, stream
= sys
.stdout
):
186 "Pretty-print the meta data for this node."
187 print >> stream
, " * Node name: ", self
.name
188 print >> stream
, " Path: ", self
.path
189 mime_type
= self
.props
.get("svn:mime-type")
190 if not mime_type
or mime_type
.startswith("text/"):
191 if self
.children
is not None:
192 print >> stream
, " Contents: N/A (node is a directory)"
194 print >> stream
, " Contents: ", self
.contents
196 print >> stream
, " Contents: %d bytes (binary)" % len(self
.contents
)
197 print >> stream
, " Properties:", self
.props
198 print >> stream
, " Attributes:", self
.atts
199 ### FIXME: I'd like to be able to tell the difference between
200 ### self.children is None (file) and self.children == [] (empty
201 ### directory), but it seems that most places that construct
202 ### SVNTreeNode objects don't even try to do that. --xbc
204 ### See issue #1611 about this problem. -kfogel
205 if self
.children
is not None:
206 print >> stream
, " Children: ", len(self
.children
)
208 print >> stream
, " Children: N/A (node is a file)"
212 s
= StringIO
.StringIO()
217 # reserved name of the root of the tree
218 root_node_name
= "__SVN_ROOT_NODE"
222 def add_elements_as_path(top_node
, element_list
):
223 """Add the elements in ELEMENT_LIST as if they were a single path
226 # The idea of this function is to take a list like so:
227 # ['A', 'B', 'C'] and a top node, say 'Z', and generate a tree
232 # where 1 -> 2 means 2 is a child of 1.
236 for i
in element_list
:
237 new_node
= SVNTreeNode(i
, None)
238 prev_node
.add_child(new_node
)
242 # Sorting function -- sort 2 nodes by their names.
243 def node_is_greater(a
, b
):
244 "Sort the names of two nodes."
254 # Helper for compare_trees
255 def compare_file_nodes(a
, b
):
256 """Compare two nodes' names, contents, and properties, ignoring
257 children. Return 0 if the same, 1 otherwise."""
260 if a
.contents
!= b
.contents
:
262 if a
.props
!= b
.props
:
268 # Internal utility used by most build_tree_from_foo() routines.
270 # (Take the output and .add_child() it to a root node.)
272 def create_from_path(path
, contents
=None, props
={}, atts
={}):
273 """Create and return a linked list of treenodes, given a PATH
274 representing a single entry into that tree. CONTENTS and PROPS are
275 optional arguments that will be deposited in the tail node."""
277 # get a list of all the names in the path
278 # each of these will be a child of the former
280 path
= path
.replace(os
.sep
, "/")
281 elements
= path
.split("/")
282 if len(elements
) == 0:
283 ### we should raise a less generic error here. which?
288 # if this is Windows: if the path contains a drive name (X:), make it
291 m
= re
.match("([a-zA-Z]:)(.+)", elements
[0])
293 root_node
= SVNTreeNode(m
.group(1), None)
294 elements
[0] = m
.group(2)
295 add_elements_as_path(root_node
, elements
[0:])
298 root_node
= SVNTreeNode(elements
[0], None)
299 add_elements_as_path(root_node
, elements
[1:])
301 # deposit contents in the very last node.
304 if node
.children
is None:
305 node
.contents
= contents
309 node
= node
.children
[0]
314 # helper for handle_dir(), which is a helper for build_tree_from_wc()
316 "Return a hash of props for PATH, using the svn client."
318 # It's not kosher to look inside .svn/ and try to read the internal
319 # property storage format. Instead, we use 'svn proplist'. After
320 # all, this is the only way the user can retrieve them, so we're
321 # respecting the black-box paradigm.
324 output
, errput
= main
.run_svn(1, "proplist", path
, "--verbose")
328 if line
.startswith('Properties on '):
330 # Not a misprint; "> 0" really is preferable to ">= 0" in this case.
331 if line
.find(' : ') > 0:
332 name
, value
= line
.split(' : ')
334 value
= value
.strip()
337 else: # Multi-line property, so re-use the current name.
339 # Undo, as best we can, the strip(value) that was done before
340 # we knew this was a multiline property.
341 props
[name
] = props
[name
] + "\n"
343 props
[name
] = props
[name
] + line
348 # helper for handle_dir(), which helps build_tree_from_wc()
350 "Return a string with the textual contents of a file at PATH."
353 if not os
.path
.isfile(path
):
362 # main recursive helper for build_tree_from_wc()
363 def handle_dir(path
, current_parent
, load_props
, ignore_svn
):
365 # get a list of all the files
366 all_files
= os
.listdir(path
)
370 # put dirs and files in their own lists, and remove SVN dirs
372 f
= os
.path
.join(path
, f
)
373 if (os
.path
.isdir(f
) and os
.path
.basename(f
) != main
.get_admin_name()):
375 elif os
.path
.isfile(f
):
378 # add each file as a child of CURRENT_PARENT
380 fcontents
= get_text(f
)
382 fprops
= get_props(f
)
385 current_parent
.add_child(SVNTreeNode(os
.path
.basename(f
), None,
388 # for each subdir, create a node, walk its tree, add it as a child
391 dprops
= get_props(d
)
394 new_dir_node
= SVNTreeNode(os
.path
.basename(d
), None, None, dprops
)
395 handle_dir(d
, new_dir_node
, load_props
, ignore_svn
)
396 current_parent
.add_child(new_dir_node
)
398 def get_child(node
, name
):
399 """If SVNTreeNode NODE contains a child named NAME, return child;
400 else, return None. If SVNTreeNode is not a directory, raise a
401 SVNTreeIsNotDirectory exception"""
402 if node
.children
== None:
403 raise SVNTreeIsNotDirectory
404 for n
in node
.children
:
410 # Helpers for compare_trees
411 def default_singleton_handler_a(a
, baton
):
412 "Printing SVNTreeNode A's name, then raise SVNTreeUnequal."
413 print "Couldn't find node '%s' in expected tree" % a
.name
417 def default_singleton_handler_b(b
, baton
):
418 "Printing SVNTreeNode B's name, then raise SVNTreeUnequal."
419 print "Couldn't find node '%s' in actual tree" % b
.name
423 # A test helper function implementing the singleton_handler_a API.
424 def detect_conflict_files(node
, extra_files
):
425 """NODE has been discovered, an extra file on disk. Verify that it
426 matches one of the regular expressions in the EXTRA_FILES list. If
427 it matches, remove the match from the list. If it doesn't match,
428 raise an exception."""
430 for pattern
in extra_files
:
431 mo
= re
.match(pattern
, node
.name
)
433 extra_files
.pop(extra_files
.index(pattern
)) # delete pattern from list
436 msg
= "Encountered unexpected disk path '" + node
.name
+ "'"
439 raise SVNTreeUnequal(msg
)
441 ###########################################################################
442 ###########################################################################
443 # EXPORTED ROUTINES ARE BELOW
446 # Main tree comparison routine!
448 def compare_trees(a
, b
,
449 singleton_handler_a
= None,
451 singleton_handler_b
= None,
453 """Compare SVNTreeNodes A (actual) and B (expected), expressing
454 differences using FUNC_A and FUNC_B. FUNC_A and FUNC_B are
455 functions of two arguments (a SVNTreeNode and a context baton), and
456 may raise exception SVNTreeUnequal. Their return value is ignored.
458 If A and B are both files, then return if their contents,
459 properties, and names are all the same; else raise a SVNTreeUnequal.
460 If A is a file and B is a directory, raise a SVNTreeUnequal; same
461 vice-versa. If both are directories, then for each entry that
462 exists in both, call compare_trees on the two entries; otherwise, if
463 the entry exists only in A, invoke FUNC_A on it, and likewise for
466 def display_nodes(a
, b
):
467 'Display two nodes, expected and actual.'
468 print "============================================================="
469 print "Expected '%s' and actual '%s' are different!" % (b
.name
, a
.name
)
470 print "============================================================="
471 print "EXPECTED NODE TO BE:"
472 print "============================================================="
474 print "============================================================="
475 print "ACTUAL NODE FOUND:"
476 print "============================================================="
479 # Setup singleton handlers
480 if (singleton_handler_a
is None):
481 singleton_handler_a
= default_singleton_handler_a
482 if (singleton_handler_b
is None):
483 singleton_handler_b
= default_singleton_handler_b
486 # A and B are both files.
487 if ((a
.children
is None) and (b
.children
is None)):
488 if compare_file_nodes(a
, b
):
491 # One is a file, one is a directory.
492 elif (((a
.children
is None) and (b
.children
is not None))
493 or ((a
.children
is not None) and (b
.children
is None))):
495 raise SVNTypeMismatch
496 # They're both directories.
498 # First, compare the directories' two hashes.
499 if (a
.props
!= b
.props
) or (a
.atts
!= b
.atts
):
504 # For each child of A, check and see if it's in B. If so, run
505 # compare_trees on the two children and add b's child to
506 # accounted_for. If not, run FUNC_A on the child. Next, for each
507 # child of B, check and see if it's in accounted_for. If it is,
508 # do nothing. If not, run FUNC_B on it.
509 for a_child
in a
.children
:
510 b_child
= get_child(b
, a_child
.name
)
512 accounted_for
.append(b_child
)
513 compare_trees(a_child
, b_child
,
514 singleton_handler_a
, a_baton
,
515 singleton_handler_b
, b_baton
)
517 singleton_handler_a(a_child
, a_baton
)
518 for b_child
in b
.children
:
519 if (b_child
not in accounted_for
):
520 singleton_handler_b(b_child
, b_baton
)
521 except SVNTypeMismatch
:
522 print 'Unequal Types: one Node is a file, the other is a directory'
524 except SVNTreeIsNotDirectory
:
525 print "Error: Foolish call to get_child."
528 print "Error: unequal number of children"
530 except SVNTreeUnequal
:
531 if a
.name
!= root_node_name
:
532 print "Unequal at node %s" % a
.name
537 # Visually show a tree's structure
539 def dump_tree(n
,indent
=""):
540 "Print out a nice representation of the tree's structure."
542 # Code partially stolen from Dave Beazley
543 if n
.children
is None:
546 tmp_children
= n
.children
548 if n
.name
== root_node_name
:
549 print "%s%s" % (indent
, "ROOT")
551 print "%s%s" % (indent
, n
.name
)
553 indent
= indent
.replace("-", " ")
554 indent
= indent
.replace("+", " ")
555 for i
in range(len(tmp_children
)):
557 if i
== len(tmp_children
559 dump_tree(c
,indent
+ " +-- ")
561 dump_tree(c
,indent
+ " |-- ")
564 ###################################################################
565 ###################################################################
566 # PARSERS that return trees made of SVNTreeNodes....
569 ###################################################################
570 # Build an "expected" static tree from a list of lists
573 # Create a list of lists, of the form:
575 # [ [path, contents, props, atts], ... ]
577 # and run it through this parser. PATH is a string, a path to the
578 # object. CONTENTS is either a string or None, and PROPS and ATTS are
579 # populated dictionaries or {}. Each CONTENTS/PROPS/ATTS will be
580 # attached to the basename-node of the associated PATH.
582 def build_generic_tree(nodelist
):
583 "Given a list of lists of a specific format, return a tree."
585 root
= SVNTreeNode(root_node_name
)
587 for list in nodelist
:
588 new_branch
= create_from_path(list[0], list[1], list[2], list[3])
589 root
.add_child(new_branch
)
594 ####################################################################
595 # Build trees from different kinds of subcommand output.
598 # Parse co/up output into a tree.
600 # Tree nodes will contain no contents, a 'status' att, and a
603 def build_tree_from_checkout(lines
, include_skipped
=1):
604 "Return a tree derived by parsing the output LINES from 'co' or 'up'."
606 root
= SVNTreeNode(root_node_name
)
607 rm1
= re
.compile ('^([RMAGCUDE_ ][MAGCUDE_ ])([B ])\s+(.+)')
609 rm2
= re
.compile ('^(Restored|Skipped)\s+\'(.+)\'')
611 rm2
= re
.compile ('^(Restored)\s+\'(.+)\'')
614 match
= rm1
.search(line
)
615 if match
and match
.groups():
616 new_branch
= create_from_path(match
.group(3), None, {},
617 {'status' : match
.group(1)})
618 root
.add_child(new_branch
)
620 match
= rm2
.search(line
)
621 if match
and match
.groups():
622 new_branch
= create_from_path(match
.group(2), None, {},
623 {'verb' : match
.group(1)})
624 root
.add_child(new_branch
)
629 # Parse ci/im output into a tree.
631 # Tree nodes will contain no contents, and only one 'verb' att.
633 def build_tree_from_commit(lines
):
634 "Return a tree derived by parsing the output LINES from 'ci' or 'im'."
636 # Lines typically have a verb followed by whitespace then a path.
637 root
= SVNTreeNode(root_node_name
)
638 rm1
= re
.compile ('^(\w+( \(bin\))?)\s+(.+)')
639 rm2
= re
.compile ('^Transmitting')
642 match
= rm2
.search(line
)
644 match
= rm1
.search(line
)
645 if match
and match
.groups():
646 new_branch
= create_from_path(match
.group(3), None, {},
647 {'verb' : match
.group(1)})
648 root
.add_child(new_branch
)
653 # Parse status output into a tree.
655 # Tree nodes will contain no contents, and these atts:
657 # 'status', 'wc_rev',
658 # ... and possibly 'locked', 'copied', 'writelocked',
659 # IFF columns non-empty.
662 def build_tree_from_status(lines
):
663 "Return a tree derived by parsing the output LINES from 'st -vuq'."
665 root
= SVNTreeNode(root_node_name
)
667 # 'status -v' output looks like this:
669 # "%c%c%c%c%c%c %c %6s %6s %-12s %s\n"
671 # (Taken from 'print_status' in subversion/svn/status.c.)
673 # Here are the parameters. The middle number in parens is the
674 # match.group(), followed by a brief description of the field:
676 # - text status (1) (single letter)
677 # - prop status (1) (single letter)
678 # - wc-lockedness flag (2) (single letter: "L" or " ")
679 # - copied flag (3) (single letter: "+" or " ")
680 # - switched flag (4) (single letter: "S" or " ")
681 # - repos lock status (5) (single letter: "K", "O", "B", "T", " ")
685 # - out-of-date flag (6) (single letter: "*" or " ")
689 # - working revision (7) (either digits or "-")
693 # - last-changed revision (8) (either digits or "?")
697 # - last author (9) (string of non-whitespace characters)
701 # - path (10) (string of characters until newline)
703 # Try http://www.wordsmith.org/anagram/anagram.cgi?anagram=ACDRMGU
704 rm
= re
.compile('^([!MACDRUG_ ][MACDRUG_ ])([L ])([+ ])([S ])([KOBT ]) ([* ]) [^0-9-]*(\d+|-|\?) +(\d|-|\?)+ +(\S+) +(.+)')
707 # Quit when we hit an externals status announcement (### someday we can fix
708 # the externals tests to expect the additional flood of externals status
710 if re
.match(r
'^Performing', line
):
713 match
= rm
.search(line
)
714 if match
and match
.groups():
715 if match
.group(9) != '-': # ignore items that only exist on repos
716 atthash
= {'status' : match
.group(1),
717 'wc_rev' : match
.group(7)}
718 if match
.group(2) != ' ':
719 atthash
['locked'] = match
.group(2)
720 if match
.group(3) != ' ':
721 atthash
['copied'] = match
.group(3)
722 if match
.group(4) != ' ':
723 atthash
['switched'] = match
.group(4)
724 if match
.group(5) != ' ':
725 atthash
['writelocked'] = match
.group(5)
726 new_branch
= create_from_path(match
.group(10), None, {}, atthash
)
728 root
.add_child(new_branch
)
733 # Parse merge "skipped" output
735 def build_tree_from_skipped(lines
):
737 root
= SVNTreeNode(root_node_name
)
738 ### Will get confused by spaces in the filename
739 rm
= re
.compile ("^Skipped.* '([^ ]+)'\n")
742 match
= rm
.search(line
)
743 if match
and match
.groups():
744 new_branch
= create_from_path(match
.group(1))
745 root
.add_child(new_branch
)
749 def build_tree_from_diff_summarize(lines
):
750 "Build a tree from output of diff --summarize"
751 root
= SVNTreeNode(root_node_name
)
752 rm
= re
.compile ("^([MAD ][M ]) (.+)\n")
755 match
= rm
.search(line
)
756 if match
and match
.groups():
757 new_branch
= create_from_path(match
.group(2),
758 atts
={'status': match
.group(1)})
759 root
.add_child(new_branch
)
763 ####################################################################
764 # Build trees by looking at the working copy
767 # The reason the 'load_props' flag is off by default is because it
768 # creates a drastic slowdown -- we spawn a new 'svn proplist'
769 # process for every file and dir in the working copy!
772 def build_tree_from_wc(wc_path
, load_props
=0, ignore_svn
=1):
773 """Takes WC_PATH as the path to a working copy. Walks the tree below
774 that path, and creates the tree based on the actual found
775 files. If IGNORE_SVN is true, then exclude SVN admin dirs from the tree.
776 If LOAD_PROPS is true, the props will be added to the tree."""
778 root
= SVNTreeNode(root_node_name
, None)
780 # if necessary, store the root dir's props in a new child node '.'.
782 props
= get_props(wc_path
)
784 root_dir_node
= SVNTreeNode(os
.path
.basename('.'), None, None, props
)
785 root
.add_child(root_dir_node
)
787 # Walk the tree recursively
788 handle_dir(os
.path
.normpath(wc_path
), root
, load_props
, ignore_svn
)