Fix various cosmetic and documentation errors.
[svnrdump.git] / svntest / tree.py
blob6fc57276790a01468214d82726a4417b625cddb5
1 #!/usr/bin/env python
3 # tree.py: tools for comparing directory trees
5 # Subversion is a tool for revision control.
6 # See http://subversion.tigris.org for more information.
7 #
8 # ====================================================================
9 # Copyright (c) 2001 CollabNet. All rights reserved.
11 # This software is licensed as described in the file COPYING, which
12 # you should have received as part of this distribution. The terms
13 # are also available at http://subversion.tigris.org/license-1.html.
14 # If newer versions of this license are posted there, you may use a
15 # newer version instead, at your option.
17 ######################################################################
19 import re
20 import string
21 import os.path
23 import main # the general svntest routines in this module.
24 from svntest import Failure
26 # Tree Exceptions.
28 # All tree exceptions should inherit from SVNTreeError
29 class SVNTreeError(Failure):
30 "Exception raised if you screw up in the tree module."
31 pass
33 class SVNTreeUnequal(SVNTreeError):
34 "Exception raised if two trees are unequal."
35 pass
37 class SVNTreeIsNotDirectory(SVNTreeError):
38 "Exception raised if get_child is passed a file."
39 pass
41 class SVNTypeMismatch(SVNTreeError):
42 "Exception raised if one node is file and other is dir"
43 pass
45 #========================================================================
47 # ===> Overview of our Datastructures <===
49 # The general idea here is that many, many things can be represented by
50 # a tree structure:
52 # - a working copy's structure and contents
53 # - the output of 'svn status'
54 # - the output of 'svn checkout/update'
55 # - the output of 'svn commit'
57 # The idea is that a test function creates a "expected" tree of some
58 # kind, and is then able to compare it to an "actual" tree that comes
59 # from running the Subversion client. This is what makes a test
60 # automated; if an actual and expected tree match exactly, then the test
61 # has passed. (See compare_trees() below.)
63 # The SVNTreeNode class is the fundamental data type used to build tree
64 # structures. The class contains a method for "dropping" a new node
65 # into an ever-growing tree structure. (See also create_from_path()).
67 # We have four parsers in this file for the four use cases listed above:
68 # each parser examines some kind of input and returns a tree of
69 # SVNTreeNode objects. (See build_tree_from_checkout(),
70 # build_tree_from_commit(), build_tree_from_status(), and
71 # build_tree_from_wc()). These trees are the "actual" trees that result
72 # from running the Subversion client.
74 # Also necessary, of course, is a convenient way for a test to create an
75 # "expected" tree. The test *could* manually construct and link a bunch
76 # of SVNTreeNodes, certainly. But instead, all the tests are using the
77 # build_generic_tree() routine instead.
79 # build_generic_tree() takes a specially-formatted list of lists as
80 # input, and returns a tree of SVNTreeNodes. The list of lists has this
81 # structure:
83 # [ ['/full/path/to/item', 'text contents', {prop-hash}, {att-hash}],
84 # [...],
85 # [...],
86 # ... ]
88 # You can see that each item in the list essentially defines an
89 # SVNTreeNode. build_generic_tree() instantiates a SVNTreeNode for each
90 # item, and then drops it into a tree by parsing each item's full path.
92 # So a typical test routine spends most of its time preparing lists of
93 # this format and sending them to build_generic_tree(), rather than
94 # building the "expected" trees directly.
96 # ### Note: in the future, we'd like to remove this extra layer of
97 # ### abstraction. We'd like the SVNTreeNode class to be more
98 # ### directly programmer-friendly, providing a number of accessor
99 # ### routines, so that tests can construct trees directly.
101 # The first three fields of each list-item are self-explanatory. It's
102 # the fourth field, the "attribute" hash, that needs some explanation.
103 # The att-hash is used to place extra information about the node itself,
104 # depending on the parsing context:
106 # - in the 'svn co/up' use-case, each line of output starts with two
107 # characters from the set of (A, D, G, U, C, _) or 'Restored'. The
108 # status code is stored in a attribute named 'status'. In the case
109 # of a restored file, the word 'Restored' is stored in an attribute
110 # named 'verb'.
112 # - in the 'svn ci/im' use-case, each line of output starts with one
113 # of the words (Adding, Deleting, Sending). This verb is stored in
114 # an attribute named 'verb'.
116 # - in the 'svn status' use-case (which is always run with the -v
117 # (--verbose) flag), each line of output contains a working revision
118 # number and a two-letter status code similar to the 'svn co/up'
119 # case. This information is stored in attributes named 'wc_rev'
120 # and 'status'. The repository revision is also printed, but it
121 # is ignored.
123 # - in the working-copy use-case, the att-hash is ignored.
126 # Finally, one last explanation: the file 'actions.py' contain a number
127 # of helper routines named 'run_and_verify_FOO'. These routines take
128 # one or more "expected" trees as input, then run some svn subcommand,
129 # then push the output through an appropriate parser to derive an
130 # "actual" tree. Then it runs compare_trees() and raises an exception
131 # on failure. This is why most tests typically end with a call to
132 # run_and_verify_FOO().
136 #========================================================================
138 # A node in a tree.
140 # If CHILDREN is None, then the node is a file. Otherwise, CHILDREN
141 # is a list of the nodes making up that directory's children.
143 # NAME is simply the name of the file or directory. CONTENTS is a
144 # string that contains the file's contents (if a file), PROPS are
145 # properties attached to files or dirs, and ATTS is a dictionary of
146 # other metadata attached to the node.
148 class SVNTreeNode:
150 def __init__(self, name, children=None, contents=None, props={}, atts={}):
151 self.name = name
152 self.children = children
153 self.contents = contents
154 self.props = props
155 self.atts = atts
156 self.path = name
158 # TODO: Check to make sure contents and children are mutually exclusive
160 def add_child(self, newchild):
161 if self.children is None: # if you're a file,
162 self.children = [] # become an empty dir.
163 child_already_exists = 0
164 for a in self.children:
165 if a.name == newchild.name:
166 child_already_exists = 1
167 break
168 if child_already_exists == 0:
169 self.children.append(newchild)
170 newchild.path = os.path.join (self.path, newchild.name)
172 # If you already have the node,
173 else:
174 if newchild.children is None:
175 # this is the 'end' of the chain, so copy any content here.
176 a.contents = newchild.contents
177 a.props = newchild.props
178 a.atts = newchild.atts
179 a.path = os.path.join (self.path, newchild.name)
180 else:
181 # try to add dangling children to your matching node
182 for i in newchild.children:
183 a.add_child(i)
186 def pprint(self):
187 print " * Node name: ", self.name
188 print " Path: ", self.path
189 print " Contents: ", self.contents
190 print " Properties:", self.props
191 print " Attributes:", self.atts
192 ### FIXME: I'd like to be able to tell the difference between
193 ### self.children is None (file) and self.children == [] (empty
194 ### directory), but it seems that most places that construct
195 ### SVNTreeNode objects don't even try to do that. --xbc
197 ### See issue #1611 about this problem. -kfogel
198 if self.children is not None:
199 print " Children: ", len(self.children)
200 else:
201 print " Children: is a file."
203 # reserved name of the root of the tree
204 root_node_name = "__SVN_ROOT_NODE"
207 # helper func
208 def add_elements_as_path(top_node, element_list):
209 """Add the elements in ELEMENT_LIST as if they were a single path
210 below TOP_NODE."""
212 # The idea of this function is to take a list like so:
213 # ['A', 'B', 'C'] and a top node, say 'Z', and generate a tree
214 # like this:
216 # Z -> A -> B -> C
218 # where 1 -> 2 means 2 is a child of 1.
221 prev_node = top_node
222 for i in element_list:
223 new_node = SVNTreeNode(i, None)
224 prev_node.add_child(new_node)
225 prev_node = new_node
228 # Sorting function -- sort 2 nodes by their names.
229 def node_is_greater(a, b):
230 "Sort the names of two nodes."
231 # Interal use only
232 if a.name == b.name:
233 return 0
234 if a.name > b.name:
235 return 1
236 else:
237 return -1
240 # Helper for compare_trees
241 def compare_file_nodes(a, b):
242 """Compare two nodes' names, contents, and properties, ignoring
243 children. Return 0 if the same, 1 otherwise."""
244 if a.name != b.name:
245 return 1
246 if a.contents != b.contents:
247 return 1
248 if a.props != b.props:
249 return 1
250 if a.atts != b.atts:
251 return 1
254 # Internal utility used by most build_tree_from_foo() routines.
256 # (Take the output and .add_child() it to a root node.)
258 def create_from_path(path, contents=None, props={}, atts={}):
259 """Create and return a linked list of treenodes, given a PATH
260 representing a single entry into that tree. CONTENTS and PROPS are
261 optional arguments that will be deposited in the tail node."""
263 # get a list of all the names in the path
264 # each of these will be a child of the former
265 if os.sep != "/":
266 path = string.replace(path, os.sep, "/")
267 elements = path.split("/")
268 if len(elements) == 0:
269 ### we should raise a less generic error here. which?
270 raise SVNTreeError
272 root_node = SVNTreeNode(elements[0], None)
274 add_elements_as_path(root_node, elements[1:])
276 # deposit contents in the very last node.
277 node = root_node
278 while 1:
279 if node.children is None:
280 node.contents = contents
281 node.props = props
282 node.atts = atts
283 break
284 node = node.children[0]
286 return root_node
289 # helper for handle_dir(), which is a helper for build_tree_from_wc()
290 def get_props(path):
291 "Return a hash of props for PATH, using the svn client."
293 # It's not kosher to look inside .svn/ and try to read the internal
294 # property storage format. Instead, we use 'svn proplist'. After
295 # all, this is the only way the user can retrieve them, so we're
296 # respecting the black-box paradigm.
298 props = {}
299 output, errput = main.run_svn(1, "proplist", path, "--verbose")
301 first_value = 0
302 for line in output:
303 if line.startswith('Properties on '):
304 continue
305 # Not a misprint; "> 0" really is preferable to ">= 0" in this case.
306 if line.find(' : ') > 0:
307 name, value = line.split(' : ')
308 name = string.strip(name)
309 value = string.strip(value)
310 props[name] = value
311 first_value = 1
312 else: # Multi-line property, so re-use the current name.
313 if first_value:
314 # Undo, as best we can, the strip(value) that was done before
315 # we knew this was a multiline property.
316 props[name] = props[name] + "\n"
317 first_value = 0
318 props[name] = props[name] + line
320 return props
323 # helper for handle_dir(), which helps build_tree_from_wc()
324 def get_text(path):
325 "Return a string with the textual contents of a file at PATH."
327 # sanity check
328 if not os.path.isfile(path):
329 return None
331 fp = open(path, 'r')
332 contents = fp.read()
333 fp.close()
334 return contents
337 # main recursive helper for build_tree_from_wc()
338 def handle_dir(path, current_parent, load_props, ignore_svn):
340 # get a list of all the files
341 all_files = os.listdir(path)
342 files = []
343 dirs = []
345 # put dirs and files in their own lists, and remove SVN dirs
346 for f in all_files:
347 f = os.path.join(path, f)
348 if (os.path.isdir(f) and os.path.basename(f) != main.get_admin_name()):
349 dirs.append(f)
350 elif os.path.isfile(f):
351 files.append(f)
353 # add each file as a child of CURRENT_PARENT
354 for f in files:
355 fcontents = get_text(f)
356 if load_props:
357 fprops = get_props(f)
358 else:
359 fprops = {}
360 current_parent.add_child(SVNTreeNode(os.path.basename(f), None,
361 fcontents, fprops))
363 # for each subdir, create a node, walk its tree, add it as a child
364 for d in dirs:
365 if load_props:
366 dprops = get_props(d)
367 else:
368 dprops = {}
369 new_dir_node = SVNTreeNode(os.path.basename(d), None, None, dprops)
370 handle_dir(d, new_dir_node, load_props, ignore_svn)
371 current_parent.add_child(new_dir_node)
373 def get_child(node, name):
374 """If SVNTreeNode NODE contains a child named NAME, return child;
375 else, return None. If SVNTreeNode is not a directory, raise a
376 SVNTreeIsNotDirectory exception"""
377 if node.children == None:
378 raise SVNTreeIsNotDirectory
379 for n in node.children:
380 if (name == n.name):
381 return n
382 return None
385 # Helpers for compare_trees
386 def default_singleton_handler_a(a, baton):
387 "Printing SVNTreeNode A's name, then raise SVNTreeUnequal."
388 print "Got singleton from actual tree:", a.name
389 a.pprint()
390 raise SVNTreeUnequal
392 def default_singleton_handler_b(b, baton):
393 "Printing SVNTreeNode B's name, then raise SVNTreeUnequal."
394 print "Got singleton from expected tree:", b.name
395 b.pprint()
396 raise SVNTreeUnequal
399 ###########################################################################
400 ###########################################################################
401 # EXPORTED ROUTINES ARE BELOW
404 # Main tree comparison routine!
406 def compare_trees(a, b,
407 singleton_handler_a = None,
408 a_baton = None,
409 singleton_handler_b = None,
410 b_baton = None):
411 """Compare SVNTreeNodes A and B, expressing differences using FUNC_A
412 and FUNC_B. FUNC_A and FUNC_B are functions of two arguments (a
413 SVNTreeNode and a context baton), and may raise exception
414 SVNTreeUnequal. Their return value is ignored.
416 If A and B are both files, then return if their contents,
417 properties, and names are all the same; else raise a SVNTreeUnequal.
418 If A is a file and B is a directory, raise a SVNTreeUnequal; same
419 vice-versa. If both are directories, then for each entry that
420 exists in both, call compare_trees on the two entries; otherwise, if
421 the entry exists only in A, invoke FUNC_A on it, and likewise for
422 B with FUNC_B."""
424 def display_nodes(a, b):
425 'Display two nodes, expected and actual.'
426 print "============================================================="
427 print "Expected", b.name, "and actual", a.name, "are different!"
428 print "============================================================="
429 print "EXPECTED NODE TO BE:"
430 print "============================================================="
431 b.pprint()
432 print "============================================================="
433 print "ACTUAL NODE FOUND:"
434 print "============================================================="
435 a.pprint()
437 # Setup singleton handlers
438 if (singleton_handler_a is None):
439 singleton_handler_a = default_singleton_handler_a
440 if (singleton_handler_b is None):
441 singleton_handler_b = default_singleton_handler_b
443 try:
444 # A and B are both files.
445 if ((a.children is None) and (b.children is None)):
446 if compare_file_nodes(a, b):
447 display_nodes(a, b)
448 raise SVNTreeUnequal
449 # One is a file, one is a directory.
450 elif (((a.children is None) and (b.children is not None))
451 or ((a.children is not None) and (b.children is None))):
452 display_nodes(a, b)
453 raise SVNTypeMismatch
454 # They're both directories.
455 else:
456 # First, compare the directories' two hashes.
457 if (a.props != b.props) or (a.atts != b.atts):
458 display_nodes(a, b)
459 raise SVNTreeUnequal
461 accounted_for = []
462 # For each child of A, check and see if it's in B. If so, run
463 # compare_trees on the two children and add b's child to
464 # accounted_for. If not, run FUNC_A on the child. Next, for each
465 # child of B, check and see if it's in accounted_for. If it is,
466 # do nothing. If not, run FUNC_B on it.
467 for a_child in a.children:
468 b_child = get_child(b, a_child.name)
469 if b_child:
470 accounted_for.append(b_child)
471 compare_trees(a_child, b_child,
472 singleton_handler_a, a_baton,
473 singleton_handler_b, b_baton)
474 else:
475 singleton_handler_a(a_child, a_baton)
476 for b_child in b.children:
477 if (b_child not in accounted_for):
478 singleton_handler_b(b_child, b_baton)
479 except SVNTypeMismatch:
480 print 'Unequal Types: one Node is a file, the other is a directory'
481 raise SVNTreeUnequal
482 except SVNTreeIsNotDirectory:
483 print "Error: Foolish call to get_child."
484 sys.exit(1)
485 except IndexError:
486 print "Error: unequal number of children"
487 raise SVNTreeUnequal
488 except SVNTreeUnequal:
489 if a.name == root_node_name:
490 raise SVNTreeUnequal
491 else:
492 print "Unequal at node %s" % a.name
493 raise SVNTreeUnequal
499 # Visually show a tree's structure
501 def dump_tree(n,indent=""):
502 "Print out a nice representation of the tree's structure."
504 # Code partially stolen from Dave Beazley
505 if n.children is None:
506 tmp_children = []
507 else:
508 tmp_children = n.children
510 if n.name == root_node_name:
511 print "%s%s" % (indent, "ROOT")
512 else:
513 print "%s%s" % (indent, n.name)
515 indent = string.replace(indent, "-", " ")
516 indent = string.replace(indent, "+", " ")
517 for i in range(len(tmp_children)):
518 c = tmp_children[i]
519 if i == len(tmp_children
520 )-1:
521 dump_tree(c,indent + " +-- ")
522 else:
523 dump_tree(c,indent + " |-- ")
526 ###################################################################
527 ###################################################################
528 # PARSERS that return trees made of SVNTreeNodes....
531 ###################################################################
532 # Build an "expected" static tree from a list of lists
535 # Create a list of lists, of the form:
537 # [ [path, contents, props, atts], ... ]
539 # and run it through this parser. PATH is a string, a path to the
540 # object. CONTENTS is either a string or None, and PROPS and ATTS are
541 # populated dictionaries or {}. Each CONTENTS/PROPS/ATTS will be
542 # attached to the basename-node of the associated PATH.
544 def build_generic_tree(nodelist):
545 "Given a list of lists of a specific format, return a tree."
547 root = SVNTreeNode(root_node_name)
549 for list in nodelist:
550 new_branch = create_from_path(list[0], list[1], list[2], list[3])
551 root.add_child(new_branch)
553 return root
556 ####################################################################
557 # Build trees from different kinds of subcommand output.
560 # Parse co/up output into a tree.
562 # Tree nodes will contain no contents, a 'status' att, and a
563 # 'writelocked' att.
565 def build_tree_from_checkout(lines):
566 "Return a tree derived by parsing the output LINES from 'co' or 'up'."
568 root = SVNTreeNode(root_node_name)
569 rm1 = re.compile ('^([MAGCUD_ ][MAGCUD_ ])([B ])\s+(.+)')
570 # There may be other verbs we need to match, in addition to
571 # "Restored". If so, add them as alternatives in the first match
572 # group below.
573 rm2 = re.compile ('^(Restored)\s+\'(.+)\'')
575 for line in lines:
576 match = rm1.search(line)
577 if match and match.groups():
578 new_branch = create_from_path(match.group(3), None, {},
579 {'status' : match.group(1)})
580 root.add_child(new_branch)
581 else:
582 match = rm2.search(line)
583 if match and match.groups():
584 new_branch = create_from_path(match.group(2), None, {},
585 {'verb' : match.group(1)})
586 root.add_child(new_branch)
588 return root
591 # Parse ci/im output into a tree.
593 # Tree nodes will contain no contents, and only one 'verb' att.
595 def build_tree_from_commit(lines):
596 "Return a tree derived by parsing the output LINES from 'ci' or 'im'."
598 # Lines typically have a verb followed by whitespace then a path.
599 root = SVNTreeNode(root_node_name)
600 rm1 = re.compile ('^(\w+( \(bin\))?)\s+(.+)')
601 rm2 = re.compile ('^Transmitting')
603 for line in lines:
604 match = rm2.search(line)
605 if not match:
606 match = rm1.search(line)
607 if match and match.groups():
608 new_branch = create_from_path(match.group(3), None, {},
609 {'verb' : match.group(1)})
610 root.add_child(new_branch)
612 return root
615 # Parse status output into a tree.
617 # Tree nodes will contain no contents, and these atts:
619 # 'status', 'wc_rev',
620 # ... and possibly 'locked', 'copied', 'writelocked',
621 # IFF columns non-empty.
624 def build_tree_from_status(lines):
625 "Return a tree derived by parsing the output LINES from 'st -vuq'."
627 root = SVNTreeNode(root_node_name)
629 # 'status -v' output looks like this:
631 # "%c%c%c%c%c%c %c %6s %6s %-12s %s\n"
633 # (Taken from 'print_status' in subversion/svn/status.c.)
635 # Here are the parameters. The middle number in parens is the
636 # match.group(), followed by a brief description of the field:
638 # - text status (1) (single letter)
639 # - prop status (1) (single letter)
640 # - wc-lockedness flag (2) (single letter: "L" or " ")
641 # - copied flag (3) (single letter: "+" or " ")
642 # - switched flag (4) (single letter: "S" or " ")
643 # - repos lock status (5) (single letter: "K", "O", "B", "T", " ")
645 # [one space]
647 # - out-of-date flag (6) (single letter: "*" or " ")
649 # [three spaces]
651 # - working revision (7) (either digits or "-")
653 # [one space]
655 # - last-changed revision (8) (either digits or "?")
657 # [one space]
659 # - last author (9) (string of non-whitespace characters)
661 # [one space]
663 # - path (10) (string of characters until newline)
665 # Try http://www.wordsmith.org/anagram/anagram.cgi?anagram=ACDRMGU
666 rm = re.compile('^([!MACDRUG_ ][MACDRUG_ ])([L ])([+ ])([S ])([KOBT ]) ([* ]) [^0-9-]*(\d+|-|\?) +(\d|-|\?)+ +(\S+) +(.+)')
667 for line in lines:
669 # Quit when we hit an externals status announcement (### someday we can fix
670 # the externals tests to expect the additional flood of externals status
671 # data).
672 if re.match(r'^Performing', line):
673 break
675 match = rm.search(line)
676 if match and match.groups():
677 if match.group(9) != '-': # ignore items that only exist on repos
678 atthash = {'status' : match.group(1),
679 'wc_rev' : match.group(7)}
680 if match.group(2) != ' ':
681 atthash['locked'] = match.group(2)
682 if match.group(3) != ' ':
683 atthash['copied'] = match.group(3)
684 if match.group(4) != ' ':
685 atthash['switched'] = match.group(4)
686 if match.group(5) != ' ':
687 atthash['writelocked'] = match.group(5)
688 new_branch = create_from_path(match.group(10), None, {}, atthash)
690 root.add_child(new_branch)
692 return root
695 # Parse merge "skipped" output
697 def build_tree_from_skipped(lines):
699 root = SVNTreeNode(root_node_name)
700 ### Will get confused by spaces in the filename
701 rm = re.compile ("^Skipped.* '([^ ]+)'\n")
703 for line in lines:
704 match = rm.search(line)
705 if match and match.groups():
706 new_branch = create_from_path(match.group(1))
707 root.add_child(new_branch)
709 return root
712 ####################################################################
713 # Build trees by looking at the working copy
716 # The reason the 'load_props' flag is off by default is because it
717 # creates a drastic slowdown -- we spawn a new 'svn proplist'
718 # process for every file and dir in the working copy!
721 def build_tree_from_wc(wc_path, load_props=0, ignore_svn=1):
722 """Takes WC_PATH as the path to a working copy. Walks the tree below
723 that path, and creates the tree based on the actual found
724 files. If IGNORE_SVN is true, then exclude SVN admin dirs from the tree.
725 If LOAD_PROPS is true, the props will be added to the tree."""
727 root = SVNTreeNode(root_node_name, None)
729 # if necessary, store the root dir's props in the root node.
730 if load_props:
731 root.props = get_props(wc_path)
733 # Walk the tree recursively
734 handle_dir(os.path.normpath(wc_path), root, load_props, ignore_svn)
736 return root
738 ### End of file.