3 # tree.py: tools for comparing directory trees
5 # Subversion is a tool for revision control.
6 # See http://subversion.tigris.org for more information.
8 # ====================================================================
9 # Copyright (c) 2001 CollabNet. All rights reserved.
11 # This software is licensed as described in the file COPYING, which
12 # you should have received as part of this distribution. The terms
13 # are also available at http://subversion.tigris.org/license-1.html.
14 # If newer versions of this license are posted there, you may use a
15 # newer version instead, at your option.
17 ######################################################################
23 import main
# the general svntest routines in this module.
24 from svntest
import Failure
28 # All tree exceptions should inherit from SVNTreeError
29 class SVNTreeError(Failure
):
30 "Exception raised if you screw up in the tree module."
33 class SVNTreeUnequal(SVNTreeError
):
34 "Exception raised if two trees are unequal."
37 class SVNTreeIsNotDirectory(SVNTreeError
):
38 "Exception raised if get_child is passed a file."
41 class SVNTypeMismatch(SVNTreeError
):
42 "Exception raised if one node is file and other is dir"
45 #========================================================================
47 # ===> Overview of our Datastructures <===
49 # The general idea here is that many, many things can be represented by
52 # - a working copy's structure and contents
53 # - the output of 'svn status'
54 # - the output of 'svn checkout/update'
55 # - the output of 'svn commit'
57 # The idea is that a test function creates a "expected" tree of some
58 # kind, and is then able to compare it to an "actual" tree that comes
59 # from running the Subversion client. This is what makes a test
60 # automated; if an actual and expected tree match exactly, then the test
61 # has passed. (See compare_trees() below.)
63 # The SVNTreeNode class is the fundamental data type used to build tree
64 # structures. The class contains a method for "dropping" a new node
65 # into an ever-growing tree structure. (See also create_from_path()).
67 # We have four parsers in this file for the four use cases listed above:
68 # each parser examines some kind of input and returns a tree of
69 # SVNTreeNode objects. (See build_tree_from_checkout(),
70 # build_tree_from_commit(), build_tree_from_status(), and
71 # build_tree_from_wc()). These trees are the "actual" trees that result
72 # from running the Subversion client.
74 # Also necessary, of course, is a convenient way for a test to create an
75 # "expected" tree. The test *could* manually construct and link a bunch
76 # of SVNTreeNodes, certainly. But instead, all the tests are using the
77 # build_generic_tree() routine instead.
79 # build_generic_tree() takes a specially-formatted list of lists as
80 # input, and returns a tree of SVNTreeNodes. The list of lists has this
83 # [ ['/full/path/to/item', 'text contents', {prop-hash}, {att-hash}],
88 # You can see that each item in the list essentially defines an
89 # SVNTreeNode. build_generic_tree() instantiates a SVNTreeNode for each
90 # item, and then drops it into a tree by parsing each item's full path.
92 # So a typical test routine spends most of its time preparing lists of
93 # this format and sending them to build_generic_tree(), rather than
94 # building the "expected" trees directly.
96 # ### Note: in the future, we'd like to remove this extra layer of
97 # ### abstraction. We'd like the SVNTreeNode class to be more
98 # ### directly programmer-friendly, providing a number of accessor
99 # ### routines, so that tests can construct trees directly.
101 # The first three fields of each list-item are self-explanatory. It's
102 # the fourth field, the "attribute" hash, that needs some explanation.
103 # The att-hash is used to place extra information about the node itself,
104 # depending on the parsing context:
106 # - in the 'svn co/up' use-case, each line of output starts with two
107 # characters from the set of (A, D, G, U, C, _) or 'Restored'. The
108 # status code is stored in a attribute named 'status'. In the case
109 # of a restored file, the word 'Restored' is stored in an attribute
112 # - in the 'svn ci/im' use-case, each line of output starts with one
113 # of the words (Adding, Deleting, Sending). This verb is stored in
114 # an attribute named 'verb'.
116 # - in the 'svn status' use-case (which is always run with the -v
117 # (--verbose) flag), each line of output contains a working revision
118 # number and a two-letter status code similar to the 'svn co/up'
119 # case. This information is stored in attributes named 'wc_rev'
120 # and 'status'. The repository revision is also printed, but it
123 # - in the working-copy use-case, the att-hash is ignored.
126 # Finally, one last explanation: the file 'actions.py' contain a number
127 # of helper routines named 'run_and_verify_FOO'. These routines take
128 # one or more "expected" trees as input, then run some svn subcommand,
129 # then push the output through an appropriate parser to derive an
130 # "actual" tree. Then it runs compare_trees() and raises an exception
131 # on failure. This is why most tests typically end with a call to
132 # run_and_verify_FOO().
136 #========================================================================
140 # If CHILDREN is None, then the node is a file. Otherwise, CHILDREN
141 # is a list of the nodes making up that directory's children.
143 # NAME is simply the name of the file or directory. CONTENTS is a
144 # string that contains the file's contents (if a file), PROPS are
145 # properties attached to files or dirs, and ATTS is a dictionary of
146 # other metadata attached to the node.
150 def __init__(self
, name
, children
=None, contents
=None, props
={}, atts
={}):
152 self
.children
= children
153 self
.contents
= contents
158 # TODO: Check to make sure contents and children are mutually exclusive
160 def add_child(self
, newchild
):
161 if self
.children
is None: # if you're a file,
162 self
.children
= [] # become an empty dir.
163 child_already_exists
= 0
164 for a
in self
.children
:
165 if a
.name
== newchild
.name
:
166 child_already_exists
= 1
168 if child_already_exists
== 0:
169 self
.children
.append(newchild
)
170 newchild
.path
= os
.path
.join (self
.path
, newchild
.name
)
172 # If you already have the node,
174 if newchild
.children
is None:
175 # this is the 'end' of the chain, so copy any content here.
176 a
.contents
= newchild
.contents
177 a
.props
= newchild
.props
178 a
.atts
= newchild
.atts
179 a
.path
= os
.path
.join (self
.path
, newchild
.name
)
181 # try to add dangling children to your matching node
182 for i
in newchild
.children
:
187 print " * Node name: ", self
.name
188 print " Path: ", self
.path
189 print " Contents: ", self
.contents
190 print " Properties:", self
.props
191 print " Attributes:", self
.atts
192 ### FIXME: I'd like to be able to tell the difference between
193 ### self.children is None (file) and self.children == [] (empty
194 ### directory), but it seems that most places that construct
195 ### SVNTreeNode objects don't even try to do that. --xbc
197 ### See issue #1611 about this problem. -kfogel
198 if self
.children
is not None:
199 print " Children: ", len(self
.children
)
201 print " Children: is a file."
203 # reserved name of the root of the tree
204 root_node_name
= "__SVN_ROOT_NODE"
208 def add_elements_as_path(top_node
, element_list
):
209 """Add the elements in ELEMENT_LIST as if they were a single path
212 # The idea of this function is to take a list like so:
213 # ['A', 'B', 'C'] and a top node, say 'Z', and generate a tree
218 # where 1 -> 2 means 2 is a child of 1.
222 for i
in element_list
:
223 new_node
= SVNTreeNode(i
, None)
224 prev_node
.add_child(new_node
)
228 # Sorting function -- sort 2 nodes by their names.
229 def node_is_greater(a
, b
):
230 "Sort the names of two nodes."
240 # Helper for compare_trees
241 def compare_file_nodes(a
, b
):
242 """Compare two nodes' names, contents, and properties, ignoring
243 children. Return 0 if the same, 1 otherwise."""
246 if a
.contents
!= b
.contents
:
248 if a
.props
!= b
.props
:
254 # Internal utility used by most build_tree_from_foo() routines.
256 # (Take the output and .add_child() it to a root node.)
258 def create_from_path(path
, contents
=None, props
={}, atts
={}):
259 """Create and return a linked list of treenodes, given a PATH
260 representing a single entry into that tree. CONTENTS and PROPS are
261 optional arguments that will be deposited in the tail node."""
263 # get a list of all the names in the path
264 # each of these will be a child of the former
266 path
= string
.replace(path
, os
.sep
, "/")
267 elements
= path
.split("/")
268 if len(elements
) == 0:
269 ### we should raise a less generic error here. which?
272 root_node
= SVNTreeNode(elements
[0], None)
274 add_elements_as_path(root_node
, elements
[1:])
276 # deposit contents in the very last node.
279 if node
.children
is None:
280 node
.contents
= contents
284 node
= node
.children
[0]
289 # helper for handle_dir(), which is a helper for build_tree_from_wc()
291 "Return a hash of props for PATH, using the svn client."
293 # It's not kosher to look inside .svn/ and try to read the internal
294 # property storage format. Instead, we use 'svn proplist'. After
295 # all, this is the only way the user can retrieve them, so we're
296 # respecting the black-box paradigm.
299 output
, errput
= main
.run_svn(1, "proplist", path
, "--verbose")
303 if line
.startswith('Properties on '):
305 # Not a misprint; "> 0" really is preferable to ">= 0" in this case.
306 if line
.find(' : ') > 0:
307 name
, value
= line
.split(' : ')
308 name
= string
.strip(name
)
309 value
= string
.strip(value
)
312 else: # Multi-line property, so re-use the current name.
314 # Undo, as best we can, the strip(value) that was done before
315 # we knew this was a multiline property.
316 props
[name
] = props
[name
] + "\n"
318 props
[name
] = props
[name
] + line
323 # helper for handle_dir(), which helps build_tree_from_wc()
325 "Return a string with the textual contents of a file at PATH."
328 if not os
.path
.isfile(path
):
337 # main recursive helper for build_tree_from_wc()
338 def handle_dir(path
, current_parent
, load_props
, ignore_svn
):
340 # get a list of all the files
341 all_files
= os
.listdir(path
)
345 # put dirs and files in their own lists, and remove SVN dirs
347 f
= os
.path
.join(path
, f
)
348 if (os
.path
.isdir(f
) and os
.path
.basename(f
) != main
.get_admin_name()):
350 elif os
.path
.isfile(f
):
353 # add each file as a child of CURRENT_PARENT
355 fcontents
= get_text(f
)
357 fprops
= get_props(f
)
360 current_parent
.add_child(SVNTreeNode(os
.path
.basename(f
), None,
363 # for each subdir, create a node, walk its tree, add it as a child
366 dprops
= get_props(d
)
369 new_dir_node
= SVNTreeNode(os
.path
.basename(d
), None, None, dprops
)
370 handle_dir(d
, new_dir_node
, load_props
, ignore_svn
)
371 current_parent
.add_child(new_dir_node
)
373 def get_child(node
, name
):
374 """If SVNTreeNode NODE contains a child named NAME, return child;
375 else, return None. If SVNTreeNode is not a directory, raise a
376 SVNTreeIsNotDirectory exception"""
377 if node
.children
== None:
378 raise SVNTreeIsNotDirectory
379 for n
in node
.children
:
385 # Helpers for compare_trees
386 def default_singleton_handler_a(a
, baton
):
387 "Printing SVNTreeNode A's name, then raise SVNTreeUnequal."
388 print "Got singleton from actual tree:", a
.name
392 def default_singleton_handler_b(b
, baton
):
393 "Printing SVNTreeNode B's name, then raise SVNTreeUnequal."
394 print "Got singleton from expected tree:", b
.name
399 ###########################################################################
400 ###########################################################################
401 # EXPORTED ROUTINES ARE BELOW
404 # Main tree comparison routine!
406 def compare_trees(a
, b
,
407 singleton_handler_a
= None,
409 singleton_handler_b
= None,
411 """Compare SVNTreeNodes A and B, expressing differences using FUNC_A
412 and FUNC_B. FUNC_A and FUNC_B are functions of two arguments (a
413 SVNTreeNode and a context baton), and may raise exception
414 SVNTreeUnequal. Their return value is ignored.
416 If A and B are both files, then return if their contents,
417 properties, and names are all the same; else raise a SVNTreeUnequal.
418 If A is a file and B is a directory, raise a SVNTreeUnequal; same
419 vice-versa. If both are directories, then for each entry that
420 exists in both, call compare_trees on the two entries; otherwise, if
421 the entry exists only in A, invoke FUNC_A on it, and likewise for
424 def display_nodes(a
, b
):
425 'Display two nodes, expected and actual.'
426 print "============================================================="
427 print "Expected", b
.name
, "and actual", a
.name
, "are different!"
428 print "============================================================="
429 print "EXPECTED NODE TO BE:"
430 print "============================================================="
432 print "============================================================="
433 print "ACTUAL NODE FOUND:"
434 print "============================================================="
437 # Setup singleton handlers
438 if (singleton_handler_a
is None):
439 singleton_handler_a
= default_singleton_handler_a
440 if (singleton_handler_b
is None):
441 singleton_handler_b
= default_singleton_handler_b
444 # A and B are both files.
445 if ((a
.children
is None) and (b
.children
is None)):
446 if compare_file_nodes(a
, b
):
449 # One is a file, one is a directory.
450 elif (((a
.children
is None) and (b
.children
is not None))
451 or ((a
.children
is not None) and (b
.children
is None))):
453 raise SVNTypeMismatch
454 # They're both directories.
456 # First, compare the directories' two hashes.
457 if (a
.props
!= b
.props
) or (a
.atts
!= b
.atts
):
462 # For each child of A, check and see if it's in B. If so, run
463 # compare_trees on the two children and add b's child to
464 # accounted_for. If not, run FUNC_A on the child. Next, for each
465 # child of B, check and see if it's in accounted_for. If it is,
466 # do nothing. If not, run FUNC_B on it.
467 for a_child
in a
.children
:
468 b_child
= get_child(b
, a_child
.name
)
470 accounted_for
.append(b_child
)
471 compare_trees(a_child
, b_child
,
472 singleton_handler_a
, a_baton
,
473 singleton_handler_b
, b_baton
)
475 singleton_handler_a(a_child
, a_baton
)
476 for b_child
in b
.children
:
477 if (b_child
not in accounted_for
):
478 singleton_handler_b(b_child
, b_baton
)
479 except SVNTypeMismatch
:
480 print 'Unequal Types: one Node is a file, the other is a directory'
482 except SVNTreeIsNotDirectory
:
483 print "Error: Foolish call to get_child."
486 print "Error: unequal number of children"
488 except SVNTreeUnequal
:
489 if a
.name
== root_node_name
:
492 print "Unequal at node %s" % a
.name
499 # Visually show a tree's structure
501 def dump_tree(n
,indent
=""):
502 "Print out a nice representation of the tree's structure."
504 # Code partially stolen from Dave Beazley
505 if n
.children
is None:
508 tmp_children
= n
.children
510 if n
.name
== root_node_name
:
511 print "%s%s" % (indent
, "ROOT")
513 print "%s%s" % (indent
, n
.name
)
515 indent
= string
.replace(indent
, "-", " ")
516 indent
= string
.replace(indent
, "+", " ")
517 for i
in range(len(tmp_children
)):
519 if i
== len(tmp_children
521 dump_tree(c
,indent
+ " +-- ")
523 dump_tree(c
,indent
+ " |-- ")
526 ###################################################################
527 ###################################################################
528 # PARSERS that return trees made of SVNTreeNodes....
531 ###################################################################
532 # Build an "expected" static tree from a list of lists
535 # Create a list of lists, of the form:
537 # [ [path, contents, props, atts], ... ]
539 # and run it through this parser. PATH is a string, a path to the
540 # object. CONTENTS is either a string or None, and PROPS and ATTS are
541 # populated dictionaries or {}. Each CONTENTS/PROPS/ATTS will be
542 # attached to the basename-node of the associated PATH.
544 def build_generic_tree(nodelist
):
545 "Given a list of lists of a specific format, return a tree."
547 root
= SVNTreeNode(root_node_name
)
549 for list in nodelist
:
550 new_branch
= create_from_path(list[0], list[1], list[2], list[3])
551 root
.add_child(new_branch
)
556 ####################################################################
557 # Build trees from different kinds of subcommand output.
560 # Parse co/up output into a tree.
562 # Tree nodes will contain no contents, a 'status' att, and a
565 def build_tree_from_checkout(lines
):
566 "Return a tree derived by parsing the output LINES from 'co' or 'up'."
568 root
= SVNTreeNode(root_node_name
)
569 rm1
= re
.compile ('^([MAGCUD_ ][MAGCUD_ ])([B ])\s+(.+)')
570 # There may be other verbs we need to match, in addition to
571 # "Restored". If so, add them as alternatives in the first match
573 rm2
= re
.compile ('^(Restored)\s+\'(.+)\'')
576 match
= rm1
.search(line
)
577 if match
and match
.groups():
578 new_branch
= create_from_path(match
.group(3), None, {},
579 {'status' : match
.group(1)})
580 root
.add_child(new_branch
)
582 match
= rm2
.search(line
)
583 if match
and match
.groups():
584 new_branch
= create_from_path(match
.group(2), None, {},
585 {'verb' : match
.group(1)})
586 root
.add_child(new_branch
)
591 # Parse ci/im output into a tree.
593 # Tree nodes will contain no contents, and only one 'verb' att.
595 def build_tree_from_commit(lines
):
596 "Return a tree derived by parsing the output LINES from 'ci' or 'im'."
598 # Lines typically have a verb followed by whitespace then a path.
599 root
= SVNTreeNode(root_node_name
)
600 rm1
= re
.compile ('^(\w+( \(bin\))?)\s+(.+)')
601 rm2
= re
.compile ('^Transmitting')
604 match
= rm2
.search(line
)
606 match
= rm1
.search(line
)
607 if match
and match
.groups():
608 new_branch
= create_from_path(match
.group(3), None, {},
609 {'verb' : match
.group(1)})
610 root
.add_child(new_branch
)
615 # Parse status output into a tree.
617 # Tree nodes will contain no contents, and these atts:
619 # 'status', 'wc_rev',
620 # ... and possibly 'locked', 'copied', 'writelocked',
621 # IFF columns non-empty.
624 def build_tree_from_status(lines
):
625 "Return a tree derived by parsing the output LINES from 'st -vuq'."
627 root
= SVNTreeNode(root_node_name
)
629 # 'status -v' output looks like this:
631 # "%c%c%c%c%c%c %c %6s %6s %-12s %s\n"
633 # (Taken from 'print_status' in subversion/svn/status.c.)
635 # Here are the parameters. The middle number in parens is the
636 # match.group(), followed by a brief description of the field:
638 # - text status (1) (single letter)
639 # - prop status (1) (single letter)
640 # - wc-lockedness flag (2) (single letter: "L" or " ")
641 # - copied flag (3) (single letter: "+" or " ")
642 # - switched flag (4) (single letter: "S" or " ")
643 # - repos lock status (5) (single letter: "K", "O", "B", "T", " ")
647 # - out-of-date flag (6) (single letter: "*" or " ")
651 # - working revision (7) (either digits or "-")
655 # - last-changed revision (8) (either digits or "?")
659 # - last author (9) (string of non-whitespace characters)
663 # - path (10) (string of characters until newline)
665 # Try http://www.wordsmith.org/anagram/anagram.cgi?anagram=ACDRMGU
666 rm
= re
.compile('^([!MACDRUG_ ][MACDRUG_ ])([L ])([+ ])([S ])([KOBT ]) ([* ]) [^0-9-]*(\d+|-|\?) +(\d|-|\?)+ +(\S+) +(.+)')
669 # Quit when we hit an externals status announcement (### someday we can fix
670 # the externals tests to expect the additional flood of externals status
672 if re
.match(r
'^Performing', line
):
675 match
= rm
.search(line
)
676 if match
and match
.groups():
677 if match
.group(9) != '-': # ignore items that only exist on repos
678 atthash
= {'status' : match
.group(1),
679 'wc_rev' : match
.group(7)}
680 if match
.group(2) != ' ':
681 atthash
['locked'] = match
.group(2)
682 if match
.group(3) != ' ':
683 atthash
['copied'] = match
.group(3)
684 if match
.group(4) != ' ':
685 atthash
['switched'] = match
.group(4)
686 if match
.group(5) != ' ':
687 atthash
['writelocked'] = match
.group(5)
688 new_branch
= create_from_path(match
.group(10), None, {}, atthash
)
690 root
.add_child(new_branch
)
695 # Parse merge "skipped" output
697 def build_tree_from_skipped(lines
):
699 root
= SVNTreeNode(root_node_name
)
700 ### Will get confused by spaces in the filename
701 rm
= re
.compile ("^Skipped.* '([^ ]+)'\n")
704 match
= rm
.search(line
)
705 if match
and match
.groups():
706 new_branch
= create_from_path(match
.group(1))
707 root
.add_child(new_branch
)
712 ####################################################################
713 # Build trees by looking at the working copy
716 # The reason the 'load_props' flag is off by default is because it
717 # creates a drastic slowdown -- we spawn a new 'svn proplist'
718 # process for every file and dir in the working copy!
721 def build_tree_from_wc(wc_path
, load_props
=0, ignore_svn
=1):
722 """Takes WC_PATH as the path to a working copy. Walks the tree below
723 that path, and creates the tree based on the actual found
724 files. If IGNORE_SVN is true, then exclude SVN admin dirs from the tree.
725 If LOAD_PROPS is true, the props will be added to the tree."""
727 root
= SVNTreeNode(root_node_name
, None)
729 # if necessary, store the root dir's props in the root node.
731 root
.props
= get_props(wc_path
)
733 # Walk the tree recursively
734 handle_dir(os
.path
.normpath(wc_path
), root
, load_props
, ignore_svn
)