2 # wc.py: functions for interacting with a Subversion working copy
4 # Subversion is a tool for revision control.
5 # See http://subversion.tigris.org for more information.
7 # ====================================================================
8 # Licensed to the Apache Software Foundation (ASF) under one
9 # or more contributor license agreements. See the NOTICE file
10 # distributed with this work for additional information
11 # regarding copyright ownership. The ASF licenses this file
12 # to you under the Apache License, Version 2.0 (the
13 # "License"); you may not use this file except in compliance
14 # with the License. You may obtain a copy of the License at
16 # http://www.apache.org/licenses/LICENSE-2.0
18 # Unless required by applicable law or agreed to in writing,
19 # software distributed under the License is distributed on an
20 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 # KIND, either express or implied. See the License for the
22 # specific language governing permissions and limitations
24 ######################################################################
35 # 'status -v' output looks like this:
37 # "%c%c%c%c%c%c%c %c %6s %6s %-12s %s\n"
39 # (Taken from 'print_status' in subversion/svn/status.c.)
41 # Here are the parameters. The middle number or string in parens is the
42 # match.group(), followed by a brief description of the field:
44 # - text status (1) (single letter)
45 # - prop status (1) (single letter)
46 # - wc-lockedness flag (2) (single letter: "L" or " ")
47 # - copied flag (3) (single letter: "+" or " ")
48 # - switched flag (4) (single letter: "S", "X" or " ")
49 # - repos lock status (5) (single letter: "K", "O", "B", "T", " ")
50 # - tree conflict flag (6) (single letter: "C" or " ")
54 # - out-of-date flag (7) (single letter: "*" or " ")
58 # - working revision ('wc_rev') (either digits or "-", "?" or " ")
62 # - last-changed revision (either digits or "?" or " ")
66 # - last author (optional string of non-whitespace
71 # - path ('path') (string of characters until newline)
73 # Working revision, last-changed revision, and last author are whitespace
74 # only if the item is missing.
76 _re_parse_status
= re
.compile('^([?!MACDRUG_ ][MACDRUG_ ])'
83 '((?P<wc_rev>\d+|-|\?) +(\d|-|\?)+ +(\S+) +)?'
86 _re_parse_skipped
= re
.compile("^Skipped.* '(.+)'\n")
88 _re_parse_summarize
= re
.compile("^([MAD ][M ]) (.+)\n")
90 _re_parse_checkout
= re
.compile('^([RMAGCUDE_ ][MAGCUDE_ ])'
94 _re_parse_co_skipped
= re
.compile('^(Restored|Skipped)\s+\'(.+)\'')
95 _re_parse_co_restored
= re
.compile('^(Restored)\s+\'(.+)\'')
97 # Lines typically have a verb followed by whitespace then a path.
98 _re_parse_commit
= re
.compile('^(\w+( \(bin\))?)\s+(.+)')
102 """Describes an existing or expected state of a working copy.
104 The primary metaphor here is a dictionary of paths mapping to instances
105 of StateItem, which describe each item in a working copy.
107 Note: the paths should be *relative* to the root of the working copy.
110 def __init__(self
, wc_dir
, desc
):
111 "Create a State using the specified description."
112 assert isinstance(desc
, dict)
115 self
.desc
= desc
# dictionary: path -> StateItem
117 def add(self
, more_desc
):
118 "Add more state items into the State."
119 assert isinstance(more_desc
, dict)
121 self
.desc
.update(more_desc
)
123 def add_state(self
, parent
, state
):
124 "Import state items from a State object, reparent the items to PARENT."
125 assert isinstance(state
, State
)
127 if parent
and parent
[-1] != '/':
129 for path
, item
in state
.desc
.items():
131 self
.desc
[path
] = item
133 def remove(self
, *paths
):
134 "Remove a path from the state (the path must exist)."
136 del self
.desc
[to_relpath(path
)]
138 def copy(self
, new_root
=None):
139 """Make a deep copy of self. If NEW_ROOT is not None, then set the
140 copy's wc_dir NEW_ROOT instead of to self's wc_dir."""
142 for path
, item
in self
.desc
.items():
143 desc
[path
] = item
.copy()
145 new_root
= self
.wc_dir
146 return State(new_root
, desc
)
148 def tweak(self
, *args
, **kw
):
149 """Tweak the items' values.
151 Each argument in ARGS is the path of a StateItem that already exists in
152 this State. Each keyword argument in KW is a modifiable property of
155 The general form of this method is .tweak([paths...,] key=value...). If
156 one or more paths are provided, then those items' values are
157 modified. If no paths are given, then all items are modified.
162 path_ref
= self
.desc
[to_relpath(path
)]
164 e
.args
= ["Path '%s' not present in WC state descriptor" % path
]
168 for item
in self
.desc
.values():
171 def tweak_some(self
, filter, **kw
):
172 "Tweak the items for which the filter returns true."
173 for path
, item
in self
.desc
.items():
174 if list(filter(path
, item
)):
177 def subtree(self
, subtree_path
):
178 """Return a State object which is a deep copy of the sub-tree
179 identified by SUBTREE_PATH (which is assumed to contain only one
180 element rooted at the tree of this State object's WC_DIR)."""
182 for path
, item
in self
.desc
.items():
183 path_elements
= path
.split("/")
184 if len(path_elements
) > 1 and path_elements
[0] == subtree_path
:
185 desc
["/".join(path_elements
[1:])] = item
.copy()
186 return State(self
.wc_dir
, desc
)
188 def write_to_disk(self
, target_dir
):
189 """Construct a directory structure on disk, matching our state.
191 WARNING: any StateItem that does not have contents (.contents is None)
192 is assumed to be a directory.
194 if not os
.path
.exists(target_dir
):
195 os
.makedirs(target_dir
)
197 for path
, item
in self
.desc
.items():
198 fullpath
= os
.path
.join(target_dir
, path
)
199 if item
.contents
is None:
201 if not os
.path
.exists(fullpath
):
202 os
.makedirs(fullpath
)
206 # ensure its directory exists
207 dirpath
= os
.path
.dirname(fullpath
)
208 if not os
.path
.exists(dirpath
):
211 # write out the file contents now
212 open(fullpath
, 'wb').write(item
.contents
)
215 """Return a "normalized" version of self.
217 A normalized version has the following characteristics:
220 * paths use forward slashes
223 If self is already normalized, then it is returned. Otherwise, a
224 new State is constructed with (shallow) references to self's
227 If the caller needs a fully disjoint State, then use .copy() on
230 if self
.wc_dir
== '':
233 base
= to_relpath(os
.path
.normpath(self
.wc_dir
))
235 desc
= dict([(repos_join(base
, path
), item
)
236 for path
, item
in self
.desc
.items()])
237 return State('', desc
)
239 def compare(self
, other
):
240 """Compare this State against an OTHER State.
242 Three new set objects will be returned: CHANGED, UNIQUE_SELF, and
243 UNIQUE_OTHER. These contain paths of StateItems that are different
244 between SELF and OTHER, paths of items unique to SELF, and paths
245 of item that are unique to OTHER, respectively.
247 assert isinstance(other
, State
)
249 norm_self
= self
.normalize()
250 norm_other
= other
.normalize()
252 # fast-path the easy case
253 if norm_self
== norm_other
:
257 paths_self
= set(norm_self
.desc
.keys())
258 paths_other
= set(norm_other
.desc
.keys())
260 for path
in paths_self
.intersection(paths_other
):
261 if norm_self
.desc
[path
] != norm_other
.desc
[path
]:
264 return changed
, paths_self
- paths_other
, paths_other
- paths_self
266 def compare_and_display(self
, label
, other
):
267 """Compare this State against an OTHER State, and display differences.
269 Information will be written to stdout, displaying any differences
270 between the two states. LABEL will be used in the display. SELF is the
271 "expected" state, and OTHER is the "actual" state.
273 If any changes are detected/diplayed, then SVNTreeUnequal is raised.
275 norm_self
= self
.normalize()
276 norm_other
= other
.normalize()
278 changed
, unique_self
, unique_other
= norm_self
.compare(norm_other
)
279 if not changed
and not unique_self
and not unique_other
:
282 # Use the shortest path as a way to find the "root-most" affected node.
283 def _shortest_path(path_set
):
285 for path
in path_set
:
286 if shortest
is None or len(path
) < len(shortest
):
291 path
= _shortest_path(changed
)
292 display_nodes(label
, path
, norm_self
.desc
[path
], norm_other
.desc
[path
])
294 path
= _shortest_path(unique_self
)
295 default_singleton_handler('actual ' + label
, path
, norm_self
.desc
[path
])
297 path
= _shortest_path(unique_other
)
298 default_singleton_handler('expected ' + label
, path
,
299 norm_other
.desc
[path
])
301 raise svntest
.tree
.SVNTreeUnequal
303 def tweak_for_entries_compare(self
):
304 for path
, item
in self
.desc
.copy().items():
306 # If this is an unversioned tree-conflict, remove it.
307 # These are only in their parents' THIS_DIR, they don't have entries.
308 if item
.status
[0] in '!?' and item
.treeconflict
== 'C':
311 # when reading the entry structures, we don't examine for text or
312 # property mods, so clear those flags. we also do not examine the
313 # filesystem, so we cannot detect missing files.
314 if item
.status
[0] in 'M!':
315 item
.status
= ' ' + item
.status
[1]
316 if item
.status
[1] == 'M':
317 item
.status
= item
.status
[0] + ' '
319 # we don't contact the repository, so our only information is what
320 # is in the working copy. 'K' means we have one and it matches the
321 # repos. 'O' means we don't have one but the repos says the item
322 # is locked by us, elsewhere. 'T' means we have one, and the repos
323 # has one, but it is now owned by somebody else. 'B' means we have
324 # one, but the repos does not.
326 # for each case of "we have one", set the writelocked state to 'K',
327 # and clear it to None for the others. this will match what is
328 # generated when we examine our working copy state.
329 if item
.writelocked
in 'TB':
330 item
.writelocked
= 'K'
331 elif item
.writelocked
== 'O':
332 item
.writelocked
= None
335 "Return an old-style tree (for compatibility purposes)."
337 for path
, item
in self
.desc
.items():
338 nodelist
.append(item
.as_node_tuple(os
.path
.join(self
.wc_dir
, path
)))
340 tree
= svntest
.tree
.build_generic_tree(nodelist
)
342 check
= tree
.as_state()
345 pprint
.pprint(self
.desc
)
346 pprint
.pprint(check
.desc
)
347 # STATE -> TREE -> STATE is lossy.
348 # In many cases, TREE -> STATE -> TREE is not.
349 # Even though our conversion from a TREE has lost some information, we
350 # may be able to verify that our lesser-STATE produces the same TREE.
351 svntest
.tree
.compare_trees('mismatch', tree
, check
.old_tree())
356 return str(self
.old_tree())
358 def __eq__(self
, other
):
359 if not isinstance(other
, State
):
361 norm_self
= self
.normalize()
362 norm_other
= other
.normalize()
363 return norm_self
.desc
== norm_other
.desc
365 def __ne__(self
, other
):
366 return not self
.__eq
__(other
)
369 def from_status(cls
, lines
):
370 """Create a State object from 'svn status' output."""
372 def not_space(value
):
373 if value
and value
!= ' ':
379 if line
.startswith('DBG:'):
382 # Quit when we hit an externals status announcement.
383 ### someday we can fix the externals tests to expect the additional
384 ### flood of externals status data.
385 if line
.startswith('Performing'):
388 match
= _re_parse_status
.search(line
)
389 if not match
or match
.group(10) == '-':
390 # ignore non-matching lines, or items that only exist on repos
393 item
= StateItem(status
=match
.group(1),
394 locked
=not_space(match
.group(2)),
395 copied
=not_space(match
.group(3)),
396 switched
=not_space(match
.group(4)),
397 writelocked
=not_space(match
.group(5)),
398 treeconflict
=not_space(match
.group(6)),
399 wc_rev
=not_space(match
.group('wc_rev')),
401 desc
[to_relpath(match
.group('path'))] = item
406 def from_skipped(cls
, lines
):
407 """Create a State object from 'Skipped' lines."""
411 if line
.startswith('DBG:'):
414 match
= _re_parse_skipped
.search(line
)
416 desc
[to_relpath(match
.group(1))] = StateItem()
421 def from_summarize(cls
, lines
):
422 """Create a State object from 'svn diff --summarize' lines."""
426 if line
.startswith('DBG:'):
429 match
= _re_parse_summarize
.search(line
)
431 desc
[to_relpath(match
.group(2))] = StateItem(status
=match
.group(1))
436 def from_checkout(cls
, lines
, include_skipped
=True):
437 """Create a State object from 'svn checkout' lines."""
440 re_extra
= _re_parse_co_skipped
442 re_extra
= _re_parse_co_restored
446 if line
.startswith('DBG:'):
449 match
= _re_parse_checkout
.search(line
)
451 if match
.group(3) == 'C':
455 desc
[to_relpath(match
.group(4))] = StateItem(status
=match
.group(1),
456 treeconflict
=treeconflict
)
458 match
= re_extra
.search(line
)
460 desc
[to_relpath(match
.group(2))] = StateItem(verb
=match
.group(1))
465 def from_commit(cls
, lines
):
466 """Create a State object from 'svn commit' lines."""
470 if line
.startswith('DBG:') or line
.startswith('Transmitting'):
473 match
= _re_parse_commit
.search(line
)
475 desc
[to_relpath(match
.group(3))] = StateItem(verb
=match
.group(1))
480 def from_wc(cls
, base
, load_props
=False, ignore_svn
=True):
481 """Create a State object from a working copy.
483 Walks the tree at PATH, building a State based on the actual files
484 and directories found. If LOAD_PROPS is True, then the properties
485 will be loaded for all nodes (Very Expensive!). If IGNORE_SVN is
486 True, then the .svn subdirectories will be excluded from the State.
489 # we're going to walk the base, and the OS wants "."
493 dot_svn
= svntest
.main
.get_admin_name()
495 for dirpath
, dirs
, files
in os
.walk(base
):
496 parent
= path_to_key(dirpath
, base
)
497 if ignore_svn
and dot_svn
in dirs
:
499 for name
in dirs
+ files
:
500 node
= os
.path
.join(dirpath
, name
)
501 if os
.path
.isfile(node
):
502 contents
= open(node
, 'r').read()
505 desc
[repos_join(parent
, name
)] = StateItem(contents
=contents
)
508 paths
= [os
.path
.join(base
, to_ospath(p
)) for p
in desc
.keys()]
510 all_props
= svntest
.tree
.get_props(paths
)
511 for node
, props
in all_props
.items():
513 desc
['.'] = StateItem(props
=props
)
516 # 'svn proplist' strips './' from the paths. put it back on.
517 node
= os
.path
.join('.', node
)
518 desc
[path_to_key(node
, base
)].props
= props
523 def from_entries(cls
, base
):
524 """Create a State object from a working copy, via the old "entries" API.
526 Walks the tree at PATH, building a State based on the information
527 provided by the old entries API, as accessed via the 'entries-dump'
531 # we're going to walk the base, and the OS wants "."
534 if os
.path
.isfile(base
):
535 # a few tests run status on a single file. quick-and-dirty this. we
536 # really should analyze the entry (similar to below) to be general.
537 dirpath
, basename
= os
.path
.split(base
)
538 entries
= svntest
.main
.run_entriesdump(dirpath
)
540 to_relpath(base
): StateItem
.from_entry(entries
[basename
]),
544 dot_svn
= svntest
.main
.get_admin_name()
546 for dirpath
, dirs
, files
in os
.walk(base
):
548 # don't visit the .svn subdir
551 # this is not a versioned directory. remove all subdirectories since
552 # we don't want to visit them. then skip this directory.
556 entries
= svntest
.main
.run_entriesdump(dirpath
)
560 elif dirpath
.startswith('.' + os
.sep
):
561 parent
= to_relpath(dirpath
[2:])
563 parent
= to_relpath(dirpath
)
565 parent_url
= entries
[''].url
567 for name
, entry
in entries
.items():
568 # if the entry is marked as DELETED *and* it is something other than
569 # schedule-add, then skip it. we can add a new node "over" where a
570 # DELETED node lives.
571 if entry
.deleted
and entry
.schedule
!= 1:
573 if name
and entry
.kind
== 2:
574 # stub subdirectory. leave a "missing" StateItem in here. note
575 # that we can't put the status as "! " because that gets tweaked
576 # out of our expected tree.
577 item
= StateItem(status
=' ', wc_rev
='?')
578 desc
[repos_join(parent
, name
)] = item
580 item
= StateItem
.from_entry(entry
)
582 desc
[repos_join(parent
, name
)] = item
583 implied_url
= repos_join(parent_url
, svn_url_quote(name
))
585 item
._url
= entry
.url
# attach URL to directory StateItems
588 grandpa
, this_name
= repos_split(parent
)
590 implied_url
= repos_join(desc
[grandpa
]._url
,
591 svn_url_quote(this_name
))
595 if implied_url
and implied_url
!= entry
.url
:
598 # only recurse into directories found in this entries. remove any
599 # which are not mentioned.
600 unmentioned
= set(dirs
) - set(entries
.keys())
601 for subdir
in unmentioned
:
608 """Describes an individual item within a working copy.
610 Note that the location of this item is not specified. An external
611 mechanism, such as the State class, will provide location information
615 def __init__(self
, contents
=None, props
=None,
616 status
=None, verb
=None, wc_rev
=None,
617 locked
=None, copied
=None, switched
=None, writelocked
=None,
619 # provide an empty prop dict if it wasn't provided
623 ### keep/make these ints one day?
624 if wc_rev
is not None:
627 # Any attribute can be None if not relevant, unless otherwise stated.
629 # A string of content (if the node is a file).
630 self
.contents
= contents
631 # A dictionary mapping prop name to prop value; never None.
633 # A two-character string from the first two columns of 'svn status'.
635 # The action word such as 'Adding' printed by commands like 'svn update'.
637 # The base revision number of the node in the WC, as a string.
639 # For the following attributes, the value is the status character of that
640 # field from 'svn status', except using value None instead of status ' '.
643 self
.switched
= switched
644 self
.writelocked
= writelocked
645 # Value 'C' or ' ', or None as an expected status meaning 'do not check'.
646 self
.treeconflict
= treeconflict
649 "Make a deep copy of self."
651 vars(new
).update(vars(self
))
652 new
.props
= self
.props
.copy()
655 def tweak(self
, **kw
):
656 for name
, value
in kw
.items():
657 # Refine the revision args (for now) to ensure they are strings.
658 if value
is not None and name
== 'wc_rev':
660 setattr(self
, name
, value
)
662 def __eq__(self
, other
):
663 if not isinstance(other
, StateItem
):
665 v_self
= dict([(k
, v
) for k
, v
in vars(self
).items()
666 if not k
.startswith('_')])
667 v_other
= dict([(k
, v
) for k
, v
in vars(other
).items()
668 if not k
.startswith('_')])
669 if self
.treeconflict
is None:
670 v_other
= v_other
.copy()
671 v_other
['treeconflict'] = None
672 if other
.treeconflict
is None:
673 v_self
= v_self
.copy()
674 v_self
['treeconflict'] = None
675 return v_self
== v_other
677 def __ne__(self
, other
):
678 return not self
.__eq
__(other
)
680 def as_node_tuple(self
, path
):
682 if self
.status
is not None:
683 atts
['status'] = self
.status
684 if self
.verb
is not None:
685 atts
['verb'] = self
.verb
686 if self
.wc_rev
is not None:
687 atts
['wc_rev'] = self
.wc_rev
688 if self
.locked
is not None:
689 atts
['locked'] = self
.locked
690 if self
.copied
is not None:
691 atts
['copied'] = self
.copied
692 if self
.switched
is not None:
693 atts
['switched'] = self
.switched
694 if self
.writelocked
is not None:
695 atts
['writelocked'] = self
.writelocked
696 if self
.treeconflict
is not None:
697 atts
['treeconflict'] = self
.treeconflict
699 return (os
.path
.normpath(path
), self
.contents
, self
.props
, atts
)
702 def from_entry(cls
, entry
):
704 if entry
.schedule
== 1: # svn_wc_schedule_add
706 elif entry
.schedule
== 2: # svn_wc_schedule_delete
708 elif entry
.schedule
== 3: # svn_wc_schedule_replace
710 elif entry
.conflict_old
:
711 ### I'm assuming we only need to check one, rather than all conflict_*
714 ### is this the sufficient? guessing here w/o investigation.
716 status
= status
[0] + 'C'
727 if entry
.revision
== -1:
730 wc_rev
= entry
.revision
733 ### figure out switched
741 return cls(status
=status
,
746 writelocked
=writelocked
,
751 to_relpath
= to_ospath
= lambda path
: path
753 def to_relpath(path
):
754 return path
.replace(os
.sep
, '/')
756 return path
.replace('/', os
.sep
)
759 def path_to_key(path
, base
):
763 if base
.endswith(os
.sep
) or base
.endswith('/') or base
.endswith(':'):
764 # Special path format on Windows:
765 # 'C:/' Is a valid root which includes its separator ('C:/file')
766 # 'C:' is a valid root which isn't followed by a separator ('C:file')
768 # In this case, we don't need a separator between the base and the path.
771 # Account for a separator between the base and the relpath we're creating
774 assert path
.startswith(base
), "'%s' is not a prefix of '%s'" % (base
, path
)
775 return to_relpath(path
[len(base
):])
778 def repos_split(repos_relpath
):
779 """Split a repos path into its directory and basename parts."""
780 idx
= repos_relpath
.rfind('/')
782 return '', repos_relpath
783 return repos_relpath
[:idx
], repos_relpath
[idx
+1:]
786 def repos_join(base
, path
):
787 """Join two repos paths. This generally works for URLs too."""
792 return base
+ '/' + path
795 def svn_url_quote(url
):
796 # svn defines a different set of "safe" characters than Python does, so
797 # we need to avoid escaping them. see subr/path.c:uri_char_validity[]
798 return urllib
.quote(url
, "!$&'()*+,-./:=@_~")
802 ### probably toss these at some point. or major rework. or something.
803 ### just bootstrapping some changes for now.
806 def item_to_node(path
, item
):
807 tree
= svntest
.tree
.build_generic_tree([item
.as_node_tuple(path
)])
809 assert len(tree
.children
) == 1
810 tree
= tree
.children
[0]
813 ### yanked from tree.compare_trees()
814 def display_nodes(label
, path
, expected
, actual
):
815 'Display two nodes, expected and actual.'
816 expected
= item_to_node(path
, expected
)
817 actual
= item_to_node(path
, actual
)
818 print("=============================================================")
819 print("Expected '%s' and actual '%s' in %s tree are different!"
820 % (expected
.name
, actual
.name
, label
))
821 print("=============================================================")
822 print("EXPECTED NODE TO BE:")
823 print("=============================================================")
825 print("=============================================================")
826 print("ACTUAL NODE FOUND:")
827 print("=============================================================")
830 ### yanked from tree.py
831 def default_singleton_handler(description
, path
, item
):
832 node
= item_to_node(path
, item
)
833 print("Couldn't find node '%s' in %s tree" % (node
.name
, description
))
835 raise svntest
.tree
.SVNTreeUnequal