2 # tree.py: tools for comparing directory trees
4 # Subversion is a tool for revision control.
5 # See http://subversion.tigris.org for more information.
7 # ====================================================================
8 # Copyright (c) 2001, 2006 CollabNet. All rights reserved.
10 # This software is licensed as described in the file COPYING, which
11 # you should have received as part of this distribution. The terms
12 # are also available at http://subversion.tigris.org/license-1.html.
13 # If newer versions of this license are posted there, you may use a
14 # newer version instead, at your option.
16 ######################################################################
22 import main
# the general svntest routines in this module.
23 from svntest
import Failure
27 # All tree exceptions should inherit from SVNTreeError
28 class SVNTreeError(Failure
):
29 "Exception raised if you screw up in the tree module."
32 class SVNTreeUnequal(SVNTreeError
):
33 "Exception raised if two trees are unequal."
36 class SVNTreeIsNotDirectory(SVNTreeError
):
37 "Exception raised if get_child is passed a file."
40 class SVNTypeMismatch(SVNTreeError
):
41 "Exception raised if one node is file and other is dir"
44 #========================================================================
46 # ===> Overview of our Datastructures <===
48 # The general idea here is that many, many things can be represented by
51 # - a working copy's structure and contents
52 # - the output of 'svn status'
53 # - the output of 'svn checkout/update'
54 # - the output of 'svn commit'
56 # The idea is that a test function creates a "expected" tree of some
57 # kind, and is then able to compare it to an "actual" tree that comes
58 # from running the Subversion client. This is what makes a test
59 # automated; if an actual and expected tree match exactly, then the test
60 # has passed. (See compare_trees() below.)
62 # The SVNTreeNode class is the fundamental data type used to build tree
63 # structures. The class contains a method for "dropping" a new node
64 # into an ever-growing tree structure. (See also create_from_path()).
66 # We have four parsers in this file for the four use cases listed above:
67 # each parser examines some kind of input and returns a tree of
68 # SVNTreeNode objects. (See build_tree_from_checkout(),
69 # build_tree_from_commit(), build_tree_from_status(), and
70 # build_tree_from_wc()). These trees are the "actual" trees that result
71 # from running the Subversion client.
73 # Also necessary, of course, is a convenient way for a test to create an
74 # "expected" tree. The test *could* manually construct and link a bunch
75 # of SVNTreeNodes, certainly. But instead, all the tests are using the
76 # build_generic_tree() routine instead.
78 # build_generic_tree() takes a specially-formatted list of lists as
79 # input, and returns a tree of SVNTreeNodes. The list of lists has this
82 # [ ['/full/path/to/item', 'text contents', {prop-hash}, {att-hash}],
87 # You can see that each item in the list essentially defines an
88 # SVNTreeNode. build_generic_tree() instantiates a SVNTreeNode for each
89 # item, and then drops it into a tree by parsing each item's full path.
91 # So a typical test routine spends most of its time preparing lists of
92 # this format and sending them to build_generic_tree(), rather than
93 # building the "expected" trees directly.
95 # ### Note: in the future, we'd like to remove this extra layer of
96 # ### abstraction. We'd like the SVNTreeNode class to be more
97 # ### directly programmer-friendly, providing a number of accessor
98 # ### routines, so that tests can construct trees directly.
100 # The first three fields of each list-item are self-explanatory. It's
101 # the fourth field, the "attribute" hash, that needs some explanation.
102 # The att-hash is used to place extra information about the node itself,
103 # depending on the parsing context:
105 # - in the 'svn co/up' use-case, each line of output starts with two
106 # characters from the set of (A, D, G, U, C, _) or 'Restored'. The
107 # status code is stored in a attribute named 'status'. In the case
108 # of a restored file, the word 'Restored' is stored in an attribute
111 # - in the 'svn ci/im' use-case, each line of output starts with one
112 # of the words (Adding, Deleting, Sending). This verb is stored in
113 # an attribute named 'verb'.
115 # - in the 'svn status' use-case (which is always run with the -v
116 # (--verbose) flag), each line of output contains a working revision
117 # number and a two-letter status code similar to the 'svn co/up'
118 # case. This information is stored in attributes named 'wc_rev'
119 # and 'status'. The repository revision is also printed, but it
122 # - in the working-copy use-case, the att-hash is ignored.
125 # Finally, one last explanation: the file 'actions.py' contain a number
126 # of helper routines named 'run_and_verify_FOO'. These routines take
127 # one or more "expected" trees as input, then run some svn subcommand,
128 # then push the output through an appropriate parser to derive an
129 # "actual" tree. Then it runs compare_trees() and raises an exception
130 # on failure. This is why most tests typically end with a call to
131 # run_and_verify_FOO().
135 #========================================================================
139 # If CHILDREN is None, then the node is a file. Otherwise, CHILDREN
140 # is a list of the nodes making up that directory's children.
142 # NAME is simply the name of the file or directory. CONTENTS is a
143 # string that contains the file's contents (if a file), PROPS are
144 # properties attached to files or dirs, and ATTS is a dictionary of
145 # other metadata attached to the node.
149 def __init__(self
, name
, children
=None, contents
=None, props
={}, atts
={}):
151 self
.children
= children
152 self
.contents
= contents
157 # TODO: Check to make sure contents and children are mutually exclusive
159 def add_child(self
, newchild
):
160 child_already_exists
= 0
161 if self
.children
is None: # if you're a file,
162 self
.children
= [] # become an empty dir.
164 for a
in self
.children
:
165 if a
.name
== newchild
.name
:
166 child_already_exists
= 1
169 if child_already_exists
:
170 if newchild
.children
is None:
171 # this is the 'end' of the chain, so copy any content here.
172 a
.contents
= newchild
.contents
173 a
.props
= newchild
.props
174 a
.atts
= newchild
.atts
175 a
.path
= os
.path
.join (self
.path
, newchild
.name
)
177 # try to add dangling children to your matching node
178 for i
in newchild
.children
:
181 self
.children
.append(newchild
)
182 newchild
.path
= os
.path
.join(self
.path
, newchild
.name
)
185 def pprint(self
, stream
= sys
.stdout
):
186 "Pretty-print the meta data for this node."
187 print >> stream
, " * Node name: ", self
.name
188 print >> stream
, " Path: ", self
.path
189 mime_type
= self
.props
.get("svn:mime-type")
190 if not mime_type
or mime_type
.startswith("text/"):
191 if self
.children
is not None:
192 print >> stream
, " Contents: N/A (node is a directory)"
194 print >> stream
, " Contents: ", self
.contents
196 print >> stream
, " Contents: %d bytes (binary)" % len(self
.contents
)
197 print >> stream
, " Properties:", self
.props
198 print >> stream
, " Attributes:", self
.atts
199 ### FIXME: I'd like to be able to tell the difference between
200 ### self.children is None (file) and self.children == [] (empty
201 ### directory), but it seems that most places that construct
202 ### SVNTreeNode objects don't even try to do that. --xbc
204 ### See issue #1611 about this problem. -kfogel
205 if self
.children
is not None:
206 print >> stream
, " Children: ", len(self
.children
)
208 print >> stream
, " Children: N/A (node is a file)"
212 s
= StringIO
.StringIO()
217 # reserved name of the root of the tree
218 root_node_name
= "__SVN_ROOT_NODE"
222 def add_elements_as_path(top_node
, element_list
):
223 """Add the elements in ELEMENT_LIST as if they were a single path
226 # The idea of this function is to take a list like so:
227 # ['A', 'B', 'C'] and a top node, say 'Z', and generate a tree
232 # where 1 -> 2 means 2 is a child of 1.
236 for i
in element_list
:
237 new_node
= SVNTreeNode(i
, None)
238 prev_node
.add_child(new_node
)
242 # Sorting function -- sort 2 nodes by their names.
243 def node_is_greater(a
, b
):
244 "Sort the names of two nodes."
254 # Helper for compare_trees
255 def compare_file_nodes(a
, b
):
256 """Compare two nodes' names, contents, and properties, ignoring
257 children. Return 0 if the same, 1 otherwise."""
260 if a
.contents
!= b
.contents
:
262 if a
.props
!= b
.props
:
268 # Internal utility used by most build_tree_from_foo() routines.
270 # (Take the output and .add_child() it to a root node.)
272 def create_from_path(path
, contents
=None, props
={}, atts
={}):
273 """Create and return a linked list of treenodes, given a PATH
274 representing a single entry into that tree. CONTENTS and PROPS are
275 optional arguments that will be deposited in the tail node."""
277 # get a list of all the names in the path
278 # each of these will be a child of the former
280 path
= path
.replace(os
.sep
, "/")
281 elements
= path
.split("/")
282 if len(elements
) == 0:
283 ### we should raise a less generic error here. which?
288 # if this is Windows: if the path contains a drive name (X:), make it
291 m
= re
.match("([a-zA-Z]:)(.+)", elements
[0])
293 root_node
= SVNTreeNode(m
.group(1), None)
294 elements
[0] = m
.group(2)
295 add_elements_as_path(root_node
, elements
[0:])
298 root_node
= SVNTreeNode(elements
[0], None)
299 add_elements_as_path(root_node
, elements
[1:])
301 # deposit contents in the very last node.
304 if node
.children
is None:
305 node
.contents
= contents
309 node
= node
.children
[0]
314 # helper for handle_dir(), which is a helper for build_tree_from_wc()
316 "Return a hash of props for PATH, using the svn client."
318 # It's not kosher to look inside .svn/ and try to read the internal
319 # property storage format. Instead, we use 'svn proplist'. After
320 # all, this is the only way the user can retrieve them, so we're
321 # respecting the black-box paradigm.
324 exit_code
, output
, errput
= main
.run_svn(1, "proplist", path
, "--verbose")
328 if line
.startswith('Properties on '):
330 # Not a misprint; "> 0" really is preferable to ">= 0" in this case.
331 if line
.find(' : ') > 0:
332 name
, value
= line
.split(' : ')
334 value
= value
.strip()
337 else: # Multi-line property, so re-use the current name.
339 # Undo, as best we can, the strip(value) that was done before
340 # we knew this was a multiline property.
341 props
[name
] = props
[name
] + "\n"
343 # Keep the line endings consistent with what was done to the first
344 # line by stripping whitespace and then appending a newline. This
345 # prevents multiline props on Windows that must be stored as UTF8/LF
346 # in the repository (e.g. svn:mergeinfo), say like this:
348 # "propname : propvalLine1<LF>propvalLine2<LF>propvalLine3"
350 # but that print to stdout like this:
352 # Properties on 'somepath':<CR><LF>
353 # propname : propvalLine1<CR><CR><LF>
354 # propvalLine1<CR><CR><LF>
355 # propvalLine1<CR><LF>
357 # from looking like this in the returned PROPS hash:
359 # "propname" --> "propvalLine1<LF>propvalLine2<CR><LF>propvalLine3<LF>"
360 props
[name
] = props
[name
] + line
.strip() + "\n"
365 # helper for handle_dir(), which helps build_tree_from_wc()
367 "Return a string with the textual contents of a file at PATH."
370 if not os
.path
.isfile(path
):
379 # main recursive helper for build_tree_from_wc()
380 def handle_dir(path
, current_parent
, load_props
, ignore_svn
):
382 # get a list of all the files
383 all_files
= os
.listdir(path
)
387 # put dirs and files in their own lists, and remove SVN dirs
389 f
= os
.path
.join(path
, f
)
390 if (os
.path
.isdir(f
) and os
.path
.basename(f
) != main
.get_admin_name()):
392 elif os
.path
.isfile(f
):
395 # add each file as a child of CURRENT_PARENT
397 fcontents
= get_text(f
)
399 fprops
= get_props(f
)
402 current_parent
.add_child(SVNTreeNode(os
.path
.basename(f
), None,
405 # for each subdir, create a node, walk its tree, add it as a child
408 dprops
= get_props(d
)
411 new_dir_node
= SVNTreeNode(os
.path
.basename(d
), None, None, dprops
)
412 handle_dir(d
, new_dir_node
, load_props
, ignore_svn
)
413 current_parent
.add_child(new_dir_node
)
415 def get_child(node
, name
):
416 """If SVNTreeNode NODE contains a child named NAME, return child;
417 else, return None. If SVNTreeNode is not a directory, raise a
418 SVNTreeIsNotDirectory exception"""
419 if node
.children
== None:
420 raise SVNTreeIsNotDirectory
421 for n
in node
.children
:
427 # Helper for compare_trees
428 def default_singleton_handler(node
, description
):
429 """Print SVNTreeNode NODE's name, describing it with the string
430 DESCRIPTION, then raise SVNTreeUnequal."""
431 print "Couldn't find node '%s' in %s tree" % (node
.name
, description
)
435 # A test helper function implementing the singleton_handler_a API.
436 def detect_conflict_files(node
, extra_files
):
437 """NODE has been discovered, an extra file on disk. Verify that it
438 matches one of the regular expressions in the EXTRA_FILES list. If
439 it matches, remove the match from the list. If it doesn't match,
440 raise an exception."""
442 for pattern
in extra_files
:
443 mo
= re
.match(pattern
, node
.name
)
445 extra_files
.pop(extra_files
.index(pattern
)) # delete pattern from list
448 msg
= "Encountered unexpected disk path '" + node
.name
+ "'"
451 raise SVNTreeUnequal(msg
)
453 ###########################################################################
454 ###########################################################################
455 # EXPORTED ROUTINES ARE BELOW
458 # Main tree comparison routine!
460 def compare_trees(label
,
462 singleton_handler_a
= None,
464 singleton_handler_b
= None,
466 """Compare SVNTreeNodes A (actual) and B (expected), expressing
467 differences using FUNC_A and FUNC_B. FUNC_A and FUNC_B are
468 functions of two arguments (a SVNTreeNode and a context baton), and
469 may raise exception SVNTreeUnequal, in which case they use the
470 string LABEL to describe the error (their return value is ignored).
471 LABEL is typically "output", "disk", "status", or some other word
472 that labels the trees being compared.
474 If A and B are both files, then return if their contents,
475 properties, and names are all the same; else raise a SVNTreeUnequal.
476 If A is a file and B is a directory, raise a SVNTreeUnequal; same
477 vice-versa. If both are directories, then for each entry that
478 exists in both, call compare_trees on the two entries; otherwise, if
479 the entry exists only in A, invoke FUNC_A on it, and likewise for
482 def display_nodes(a
, b
):
483 'Display two nodes, expected and actual.'
484 print "============================================================="
485 print "Expected '%s' and actual '%s' in %s tree are different!" \
486 % (b
.name
, a
.name
, label
)
487 print "============================================================="
488 print "EXPECTED NODE TO BE:"
489 print "============================================================="
491 print "============================================================="
492 print "ACTUAL NODE FOUND:"
493 print "============================================================="
496 # Setup singleton handlers
497 if (singleton_handler_a
is None):
498 singleton_handler_a
= default_singleton_handler
499 a_baton
= "expected " + label
500 if (singleton_handler_b
is None):
501 singleton_handler_b
= default_singleton_handler
502 b_baton
= "actual " + label
505 # A and B are both files.
506 if ((a
.children
is None) and (b
.children
is None)):
507 if compare_file_nodes(a
, b
):
510 # One is a file, one is a directory.
511 elif (((a
.children
is None) and (b
.children
is not None))
512 or ((a
.children
is not None) and (b
.children
is None))):
514 raise SVNTypeMismatch
515 # They're both directories.
517 # First, compare the directories' two hashes.
518 if (a
.props
!= b
.props
) or (a
.atts
!= b
.atts
):
523 # For each child of A, check and see if it's in B. If so, run
524 # compare_trees on the two children and add b's child to
525 # accounted_for. If not, run FUNC_A on the child. Next, for each
526 # child of B, check and see if it's in accounted_for. If it is,
527 # do nothing. If not, run FUNC_B on it.
528 for a_child
in a
.children
:
529 b_child
= get_child(b
, a_child
.name
)
531 accounted_for
.append(b_child
)
532 compare_trees(label
, a_child
, b_child
,
533 singleton_handler_a
, a_baton
,
534 singleton_handler_b
, b_baton
)
536 singleton_handler_a(a_child
, a_baton
)
537 for b_child
in b
.children
:
538 if (b_child
not in accounted_for
):
539 singleton_handler_b(b_child
, b_baton
)
540 except SVNTypeMismatch
:
541 print 'Unequal Types: one Node is a file, the other is a directory'
543 except SVNTreeIsNotDirectory
:
544 print "Error: Foolish call to get_child."
547 print "Error: unequal number of children"
549 except SVNTreeUnequal
:
550 if a
.name
!= root_node_name
:
551 print "Unequal at node %s" % a
.name
556 # Visually show a tree's structure
558 def dump_tree(n
,indent
=""):
559 "Print out a nice representation of the tree's structure."
561 # Code partially stolen from Dave Beazley
562 if n
.children
is None:
565 tmp_children
= n
.children
567 if n
.name
== root_node_name
:
568 print "%s%s" % (indent
, "ROOT")
570 print "%s%s" % (indent
, n
.name
)
572 indent
= indent
.replace("-", " ")
573 indent
= indent
.replace("+", " ")
574 for i
in range(len(tmp_children
)):
576 if i
== len(tmp_children
578 dump_tree(c
,indent
+ " +-- ")
580 dump_tree(c
,indent
+ " |-- ")
583 ###################################################################
584 ###################################################################
585 # PARSERS that return trees made of SVNTreeNodes....
588 ###################################################################
589 # Build an "expected" static tree from a list of lists
592 # Create a list of lists, of the form:
594 # [ [path, contents, props, atts], ... ]
596 # and run it through this parser. PATH is a string, a path to the
597 # object. CONTENTS is either a string or None, and PROPS and ATTS are
598 # populated dictionaries or {}. Each CONTENTS/PROPS/ATTS will be
599 # attached to the basename-node of the associated PATH.
601 def build_generic_tree(nodelist
):
602 "Given a list of lists of a specific format, return a tree."
604 root
= SVNTreeNode(root_node_name
)
606 for list in nodelist
:
607 new_branch
= create_from_path(list[0], list[1], list[2], list[3])
608 root
.add_child(new_branch
)
613 ####################################################################
614 # Build trees from different kinds of subcommand output.
617 # Parse co/up output into a tree.
619 # Tree nodes will contain no contents, a 'status' att, and a
622 def build_tree_from_checkout(lines
, include_skipped
=1):
623 "Return a tree derived by parsing the output LINES from 'co' or 'up'."
625 root
= SVNTreeNode(root_node_name
)
626 rm1
= re
.compile ('^([RMAGCUDE_ ][MAGCUDE_ ])([B ])\s+(.+)')
628 rm2
= re
.compile ('^(Restored|Skipped)\s+\'(.+)\'')
630 rm2
= re
.compile ('^(Restored)\s+\'(.+)\'')
633 match
= rm1
.search(line
)
634 if match
and match
.groups():
635 new_branch
= create_from_path(match
.group(3), None, {},
636 {'status' : match
.group(1)})
637 root
.add_child(new_branch
)
639 match
= rm2
.search(line
)
640 if match
and match
.groups():
641 new_branch
= create_from_path(match
.group(2), None, {},
642 {'verb' : match
.group(1)})
643 root
.add_child(new_branch
)
648 # Parse ci/im output into a tree.
650 # Tree nodes will contain no contents, and only one 'verb' att.
652 def build_tree_from_commit(lines
):
653 "Return a tree derived by parsing the output LINES from 'ci' or 'im'."
655 # Lines typically have a verb followed by whitespace then a path.
656 root
= SVNTreeNode(root_node_name
)
657 rm1
= re
.compile ('^(\w+( \(bin\))?)\s+(.+)')
658 rm2
= re
.compile ('^Transmitting')
661 match
= rm2
.search(line
)
663 match
= rm1
.search(line
)
664 if match
and match
.groups():
665 new_branch
= create_from_path(match
.group(3), None, {},
666 {'verb' : match
.group(1)})
667 root
.add_child(new_branch
)
672 # Parse status output into a tree.
674 # Tree nodes will contain no contents, and these atts:
676 # 'status', 'wc_rev',
677 # ... and possibly 'locked', 'copied', 'writelocked',
678 # IFF columns non-empty.
681 def build_tree_from_status(lines
):
682 "Return a tree derived by parsing the output LINES from 'st -vuq'."
684 root
= SVNTreeNode(root_node_name
)
686 # 'status -v' output looks like this:
688 # "%c%c%c%c%c%c %c %6s %6s %-12s %s\n"
690 # (Taken from 'print_status' in subversion/svn/status.c.)
692 # Here are the parameters. The middle number in parens is the
693 # match.group(), followed by a brief description of the field:
695 # - text status (1) (single letter)
696 # - prop status (1) (single letter)
697 # - wc-lockedness flag (2) (single letter: "L" or " ")
698 # - copied flag (3) (single letter: "+" or " ")
699 # - switched flag (4) (single letter: "S" or " ")
700 # - repos lock status (5) (single letter: "K", "O", "B", "T", " ")
704 # - out-of-date flag (6) (single letter: "*" or " ")
708 # - working revision (7) (either digits or "-")
712 # - last-changed revision (8) (either digits or "?")
716 # - last author (9) (string of non-whitespace characters)
720 # - path (10) (string of characters until newline)
722 # Try http://www.wordsmith.org/anagram/anagram.cgi?anagram=ACDRMGU
723 rm
= re
.compile('^([!MACDRUG_ ][MACDRUG_ ])([L ])([+ ])([S ])([KOBT ]) ([* ]) [^0-9-]*(\d+|-|\?) +(\d|-|\?)+ +(\S+) +(.+)')
726 # Quit when we hit an externals status announcement (### someday we can fix
727 # the externals tests to expect the additional flood of externals status
729 if re
.match(r
'^Performing', line
):
732 match
= rm
.search(line
)
733 if match
and match
.groups():
734 if match
.group(9) != '-': # ignore items that only exist on repos
735 atthash
= {'status' : match
.group(1),
736 'wc_rev' : match
.group(7)}
737 if match
.group(2) != ' ':
738 atthash
['locked'] = match
.group(2)
739 if match
.group(3) != ' ':
740 atthash
['copied'] = match
.group(3)
741 if match
.group(4) != ' ':
742 atthash
['switched'] = match
.group(4)
743 if match
.group(5) != ' ':
744 atthash
['writelocked'] = match
.group(5)
745 new_branch
= create_from_path(match
.group(10), None, {}, atthash
)
747 root
.add_child(new_branch
)
752 # Parse merge "skipped" output
754 def build_tree_from_skipped(lines
):
756 root
= SVNTreeNode(root_node_name
)
757 ### Will get confused by spaces in the filename
758 rm
= re
.compile ("^Skipped.* '([^ ]+)'\n")
761 match
= rm
.search(line
)
762 if match
and match
.groups():
763 new_branch
= create_from_path(match
.group(1))
764 root
.add_child(new_branch
)
768 def build_tree_from_diff_summarize(lines
):
769 "Build a tree from output of diff --summarize"
770 root
= SVNTreeNode(root_node_name
)
771 rm
= re
.compile ("^([MAD ][M ]) (.+)\n")
774 match
= rm
.search(line
)
775 if match
and match
.groups():
776 new_branch
= create_from_path(match
.group(2),
777 atts
={'status': match
.group(1)})
778 root
.add_child(new_branch
)
782 ####################################################################
783 # Build trees by looking at the working copy
786 # The reason the 'load_props' flag is off by default is because it
787 # creates a drastic slowdown -- we spawn a new 'svn proplist'
788 # process for every file and dir in the working copy!
791 def build_tree_from_wc(wc_path
, load_props
=0, ignore_svn
=1):
792 """Takes WC_PATH as the path to a working copy. Walks the tree below
793 that path, and creates the tree based on the actual found
794 files. If IGNORE_SVN is true, then exclude SVN admin dirs from the tree.
795 If LOAD_PROPS is true, the props will be added to the tree."""
797 root
= SVNTreeNode(root_node_name
, None)
799 # if necessary, store the root dir's props in a new child node '.'.
801 props
= get_props(wc_path
)
803 root_dir_node
= SVNTreeNode(os
.path
.basename('.'), None, None, props
)
804 root
.add_child(root_dir_node
)
806 # Walk the tree recursively
807 handle_dir(os
.path
.normpath(wc_path
), root
, load_props
, ignore_svn
)