Follow-up to r29036: Now that the "mergeinfo" transaction file is no
[svn.git] / contrib / server-side / fsfsverify.py
blobb01338b7eef078d05e40c5fc94a6318410ecffb0
1 #!/usr/bin/env python
2 # Copyright (c) 2006, 2007 by John Szakmeister <john at szakmeister dot net>
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 import os
19 import optparse
20 import sys
21 import re
24 class FsfsVerifyException(Exception):
25 pass
28 class PotentiallyFixableException(FsfsVerifyException):
29 '''Represents a class of problems that we may be able to fix.'''
31 def __init__(self, message, offset):
32 FsfsVerifyException.__init__(self, message)
33 self.offset = offset
36 class InvalidInstruction(PotentiallyFixableException):
37 pass
40 class InvalidCompressedStream(PotentiallyFixableException):
41 pass
44 class InvalidRepHeader(PotentiallyFixableException):
45 pass
48 class InvalidWindow(PotentiallyFixableException):
49 pass
52 class InvalidSvndiffVersion(FsfsVerifyException):
53 pass
56 class InvalidSvndiffHeader(FsfsVerifyException):
57 pass
60 class DataCorrupt(FsfsVerifyException):
61 pass
64 class NoMoreData(FsfsVerifyException):
65 pass
68 LOG_INSTRUCTIONS = 1
69 LOG_WINDOWS = 2
70 LOG_SVNDIFF = 4
72 LOG_MASK = LOG_SVNDIFF
75 def log(type, indent, format, *args):
76 if type & LOG_MASK:
77 indentStr = ' ' * indent
78 str = format % args
79 str = '\n'.join([indentStr + x for x in str.split('\n')])
80 print str
83 class ByteStream(object):
84 def __init__(self, fileobj):
85 self._f = fileobj
87 def readByte(self):
88 return ord(self._f.read(1))
90 def tell(self):
91 return self._f.tell()
93 def advance(self, numBytes):
94 self._f.seek(numBytes, 1)
96 def clone(self):
97 if hasattr(self._f, 'clone'):
98 newFileObj = self._f.clone()
99 else:
100 # We expect the file object to map to a real file
102 # Tried using dup(), but (at least on the mac), that ends up
103 # creating 2 handles to the same underlying os file object,
104 # instead of two independent file objects. So, we resort to
105 # an open call to create a new file object
106 newFileObj = open(self._f.name, 'rb')
107 newFileObj.seek(self._f.tell())
108 return ByteStream(newFileObj)
110 # The following let ByteStream behave as a file within the
111 # context of this script.
113 def read(self, *args, **kwargs):
114 return self._f.read(*args, **kwargs)
116 def seek(self, *args, **kwargs):
117 return self._f.seek(*args, **kwargs)
120 class ZlibByteStream(ByteStream):
121 def __init__(self, fileobj, length):
122 self._f = fileobj
124 # Store the number of bytes consumed thus far so we can compute an offset
125 self._numBytesConsumed = 0
127 self._startingOffset = self._f.tell()
129 import zlib, binascii
130 self._z = zlib.decompressobj(15)
132 self._buffer = self._z.decompress(self._f.read(length))
133 self._origBufferLength = len(self._buffer)
135 def readByte(self):
136 if not self._buffer:
137 raise NoMoreData, "Unexpected end of data stream!"
139 byte = self._buffer[0]
140 self._buffer = self._buffer[1:]
142 return ord(byte)
144 def tell(self):
145 return self._origBufferLength - len(self._buffer)
147 def advance(self, numBytes):
148 while numBytes:
149 self.readByte()
151 def clone(self):
152 if hasattr(self._f, 'clone'):
153 newFileObj = self._f.clone()
154 else:
155 newFileObj = open(self._f.name, 'rb')
156 newFileObj.seek(self._f.tell())
157 return ByteStream(newFileObj)
159 # The following let ByteStream behave as a file within the
160 # context of this script.
162 def read(self, *args, **kwargs):
163 raise
165 def seek(self, *args, **kwargs):
166 raise
169 def getVarint(byteStream):
170 '''Grabs a variable sized int from a bitstream (meaning this function
171 doesn't seek).'''
173 i = long(0)
174 while True:
175 byte = byteStream.readByte()
176 i = (i << 7) + (byte & 0x7F)
177 if byte & 0x80 == 0:
178 break
179 return i
182 INSTR_COPY_SOURCE = 'copy-source'
183 INSTR_COPY_TARGET = 'copy-target'
184 INSTR_COPY_DATA = 'copy-data'
187 class SvndiffInstruction(object):
188 def __init__(self, byteStream):
189 self.instrOffset = byteStream.tell()
191 byte = byteStream.readByte()
193 instruction = (byte >> 6) & 3
194 length = byte & 0x3F
196 if instruction == 3:
197 raise InvalidInstruction(
198 "Invalid instruction found at offset %d (%02X)" % (self.instrOffset,
199 byte),
200 self.instrOffset)
202 if instruction == 0:
203 self.type = INSTR_COPY_SOURCE
204 elif instruction == 1:
205 self.type = INSTR_COPY_TARGET
206 else:
207 self.type = INSTR_COPY_DATA
209 if length == 0:
210 # Length is coded as a varint following the current byte
211 length = getVarint(byteStream)
214 self.length = length
216 if (self.type == INSTR_COPY_SOURCE) or (self.type == INSTR_COPY_TARGET):
217 self.offset = getVarint(byteStream)
219 if self.type == INSTR_COPY_SOURCE:
220 self.sourceOffset = self.offset
221 else:
222 self.sourceOffset = 0
224 if self.type == INSTR_COPY_TARGET:
225 self.targetOffset = self.offset
226 else:
227 self.targetOffset = 0
229 # Determine the number of bytes consumed in the source stream, target
230 # stream, and the data stream
232 if self.type == INSTR_COPY_SOURCE:
233 self.sourceLength = self.length
234 else:
235 self.sourceLength = 0
237 if self.type == INSTR_COPY_TARGET:
238 self.targetLength = self.length
239 else:
240 self.targetLength = 0
242 if self.type == INSTR_COPY_DATA:
243 self.dataLength = self.length
244 else:
245 self.dataLength = 0
247 self.instrLength = byteStream.tell() - self.instrOffset
249 def __repr__(self):
250 return '<SvndiffInstruction %s so:%d sl:%d to: %d tl:%d dl:%d (%d, %d)>' % (
251 self.type, self.sourceOffset, self.sourceLength, self.targetOffset,
252 self.targetLength, self.dataLength, self.instrOffset, self.instrLength)
255 class Window(object):
256 def __init__(self, byteStream, svndiffVersion):
257 if svndiffVersion not in [0, 1]:
258 raise InvalidSvndiffVersion, \
259 "Invalid svndiff version %d" % svndiffVersion
261 # Record the initial offset of the window
262 self.windowOffset = byteStream.tell()
264 try:
265 self.sourceOffset = getVarint(byteStream)
266 self.sourceLength = getVarint(byteStream)
267 self.targetLength = getVarint(byteStream)
268 self.instrLength = getVarint(byteStream)
269 self.dataLength = getVarint(byteStream)
270 self.windowHeaderLength = byteStream.tell() - self.windowOffset
271 self.windowLength = \
272 self.windowHeaderLength + self.instrLength + self.dataLength
274 # Store the byte stream, and clone it for use as a data stream.
275 self.instrByteStream = byteStream
276 self.dataByteStream = byteStream.clone()
278 # Advance the data stream past the instructions to the start of the data.
279 self.dataByteStream.advance(self.instrLength)
280 except:
281 e = InvalidWindow(
282 "The window header at offset %d appears to be corrupted" % \
283 (self.windowOffset),
284 self.windowOffset)
285 e.windowOffset = self.windowOffset
286 raise e
289 # In svndiff1, the instruction area starts with a varint-encoded length.
290 # If this length matches the one encoded in the header, then there is no
291 # compression. If it differs, then the stream is compressed with zlib.
293 self.origInstrStream = self.instrByteStream
294 self.origDataStream = self.dataByteStream
295 self.isInstrCompressed = False
296 self.isDataCompressed = False
297 self.compressedInstrLength = self.instrLength
298 self.compressedDataLength = self.dataLength
300 if svndiffVersion == 1:
301 try:
302 offset = self.instrByteStream.tell()
303 encodedInstrLength = getVarint(self.instrByteStream)
304 instrIntSize = self.instrByteStream.tell() - offset
306 offset = self.dataByteStream.tell()
307 encodedDataLength = getVarint(self.dataByteStream)
308 dataIntSize = self.dataByteStream.tell() - offset
310 self.instrLength = encodedInstrLength
311 self.dataLength = encodedDataLength
312 except:
313 e = InvalidWindow(
314 "The window header at offset %d appears to be corrupted" % \
315 (self.windowOffset),
316 self.windowOffset)
317 e.windowOffset = self.windowOffset
318 raise e
320 # Now, we need to make a determination about whether the data and
321 # instructions are compressed. If they are, we need to zlib decompress
322 # them. We do that by creating another stream and that will decompress
323 # the data on the fly.
324 try:
325 offset = self.instrByteStream.tell()
326 if self.compressedInstrLength - instrIntSize != self.instrLength:
327 self.origInstrStream = self.instrByteStream
328 self.instrByteStream = ZlibByteStream(self.origInstrStream,
329 self.compressedInstrLength)
330 self.isInstrCompressed = True
331 except Exception, e:
332 new_e = InvalidCompressedStream(
333 "Invalid compressed instr stream at offset %d (%s)" % (offset,
334 str(e)),
335 offset)
336 new_e.windowOffset = self.windowOffset
337 raise new_e
339 try:
340 offset = self.dataByteStream.tell()
341 if self.compressedDataLength - dataIntSize != self.dataLength:
342 self.origDataStream = self.dataByteStream
343 self.dataByteStream = ZlibByteStream(self.origDataStream,
344 self.compressedDataLength)
345 self.isDataCompressed = True
346 except Exception, e:
347 new_e = InvalidCompressedStream(
348 "Invalid compressed data stream at offset %d (%s)" % (offset,
349 str(e)),
350 offset)
351 new_e.windowOffset = self.windowOffset
352 raise new_e
354 def verify(self):
355 expectedInstrLength = self.instrLength
356 expectedDataLength = self.dataLength
357 expectedTargetLength = self.targetLength
358 expectedSourceLength = self.sourceLength
360 computedInstrLength = 0
361 computedDataLength = 0
362 computedTargetLength = 0
363 computedSourceLength = 0
365 if expectedInstrLength == 0:
366 e = InvalidWindow(
367 "Corrupt window (at offset %d) has 0 instructions?!" % self.windowOffset,
368 self.windowOffset)
369 e.windowOffset = self.windowOffset
370 raise e
372 while computedInstrLength < expectedInstrLength:
373 try:
374 instr = SvndiffInstruction(self.instrByteStream)
375 except PotentiallyFixableException, e:
376 e.window = self
377 e.windowOffset = self.windowOffset
378 raise
380 log(LOG_INSTRUCTIONS, 4, repr(instr))
382 computedInstrLength += instr.instrLength
383 computedDataLength += instr.dataLength
384 computedSourceLength += instr.sourceLength
385 computedTargetLength += \
386 instr.targetLength + instr.sourceLength + instr.dataLength
388 if computedInstrLength != expectedInstrLength:
389 e = InvalidWindow(
390 "The number of instruction bytes consumed (%d) doesn't match the expected number (%d)" % \
391 (computedInstrLength, expectedInstrLength),
392 self.windowOffset)
393 e.windowOffset = self.windowOffset
394 raise e
396 if computedDataLength != expectedDataLength:
397 e = InvalidWindow(
398 "The number of data bytes consumed (%d) doesn't match the expected number (%d)" % \
399 (computedDataLength, expectedDataLength),
400 self.windowOffset)
401 e.windowOffset = self.windowOffset
402 raise e
404 if computedTargetLength != expectedTargetLength:
405 e = InvalidWindow(
406 "The number of target bytes consumed (%d) doesn't match the expected number (%d)" % \
407 (computedTargetLength, expectedTargetLength),
408 self.windowOffset)
409 e.windowOffset = self.windowOffset
410 raise e
412 # It appears that the source length specified in the window, isn't exactly
413 # equal to what gets consumed. I suspect that's because the algorithm is using different
414 # offsets within the window, and one offset/length pair will reach the end of the window.
415 # However, this hasn't shown to be a clear indicator of corruption. So for now, I'm
416 # commenting it out.
418 #if computedSourceLength != expectedSourceLength:
419 # e = InvalidWindow(
420 # "The number of source bytes consumed (%d) doesn't match the expected number (%d)" % \
421 # (computedSourceLength, expectedSourceLength),
422 # self.windowOffset)
423 # e.windowOffset = self.windowOffset
424 # raise e
426 # Advance past the data. We do this using seek because we might have
427 # read a few bytes from the stream if it potentially had compressed data
428 self.origInstrStream.seek(self.windowOffset + self.windowLength)
430 def __repr__(self):
431 if hasattr(self, 'compressedInstrLength'):
432 str = 'cil: %d cdl: %d ' % (self.compressedInstrLength,
433 self.compressedDataLength)
434 else:
435 str = ''
437 return "<Window wo:%d so:%d sl:%d tl:%d %sil:%d dl:%d whl:%d wl:%d>" % (
438 self.windowOffset, self.sourceOffset, self.sourceLength,
439 self.targetLength, str, self.instrLength, self.dataLength,
440 self.windowHeaderLength, self.windowLength)
443 class Svndiff(object):
444 def __init__(self, fileobj, length):
445 self._f = fileobj
446 self.startingOffset = self._f.tell()
448 header = self._f.read(4)
449 if len(header) != 4:
450 raise EOFError, \
451 "Unexpected end of file while svndiff header at offset %d)" % \
452 (self._f.tell())
454 if header[0:3] != 'SVN':
455 raise InvalidSvndiffHeader, "Invalid svndiff header at offset %d" % \
456 (self.startingOffset)
458 self.version = ord(header[3])
459 if self.version not in [0, 1]:
460 raise InvalidSvndiffVersion, "Invalid svndiff version %d" % self.version
462 self._length = length - 4
464 def verify(self):
465 self._f.seek(self.startingOffset+4)
467 bs = ByteStream(self._f)
469 log(LOG_SVNDIFF, 2, "<Svndiff so: %d ver: %d>", self.startingOffset,
470 self.version)
472 try:
473 remaining = self._length
474 while remaining > 0:
475 w = Window(bs, self.version)
476 log(LOG_WINDOWS, 3, repr(w))
477 w.verify()
478 remaining -= w.windowLength
479 except PotentiallyFixableException, e:
480 e.svndiffStart = self.startingOffset
481 raise
484 def getDirHash(f):
485 l = f.readline()
486 if l != 'PLAIN\n':
487 raise ValueError, "Expected a PLAIN representation (%d)" % f.tell()
489 hash = {}
491 while True:
492 field = f.readline()[:-1]
493 if field == 'END':
494 break
495 assert(field[0] == 'K')
496 length = int(field.split(' ')[1])
497 field = f.readline()[:length]
499 value = f.readline()[:-1]
500 assert(value[0] == 'V')
501 length = int(value.split(' ')[1])
502 value = f.readline()[:length]
504 (type, txn) = value.split(' ')
505 hash[field] = [NodeType(type), NodeId(txn)]
507 return hash
511 class Rep(object):
512 def __init__(self, type, rev, offset, length, size, digest,
513 contentType, currentRev, noderev):
514 self.type = type
515 self.rev = int(rev)
516 self.offset = int(offset)
517 self.length = int(length)
518 self.size = int(size)
519 self.digest = digest
520 self.currentRev = currentRev
522 self.contentType = contentType
523 self.noderev = noderev
525 def __repr__(self):
526 if not self.contentType:
527 contentType = 'UNKNOWN'
528 else:
529 if self.contentType not in ['PLAIN', 'DELTA', None]:
530 contentType = 'INVALID'
531 else:
532 contentType = self.contentType
533 return '%s: %s %d %d %d %d %s' % (self.type, contentType, self.rev,
534 self.offset, self.length, self.size,
535 self.digest)
537 def verify(self, f, dumpInstructions, dumpWindows):
538 if self.contentType not in ['PLAIN', 'DELTA', None]:
539 e = InvalidRepHeader("Invalid rep header found at %d (%s)!" % \
540 (self.offset, self.contentType),
541 self.offset)
542 e.rep = self
543 e.noderev = self.noderev
544 raise e
546 if self.rev != currentRev:
547 print >>sys.stderr, "Skipping text rep since it isn't present in the current rev"
548 return
550 f.seek(self.offset)
551 header = f.read(5)
552 if header != self.contentType:
553 raise FsfsVerifyException, \
554 "Invalid rep header found at %d (%s, %s)!" % (self.offset, header,
555 self.contentType)
557 if header == 'DELTA':
558 # Consume the rest of the DELTA header
559 while f.read(1) != '\n':
560 pass
562 # This should be the start of the svndiff stream
563 actual_start = f.tell()
564 try:
565 svndiff = Svndiff(f, self.length)
566 svndiff.verify()
567 digest = None
568 except Exception, e:
569 e.rep = self
570 e.noderev = self.noderev
571 raise
573 if digest:
574 assert(digest == self.digest)
575 else:
576 if f.read(1) != '\n':
577 raise DataCorrupt, "Expected a '\\n' after PLAIN"
579 import md5
580 m = md5.new()
581 m.update(f.read(self.length))
583 if self.digest and self.digest != m.hexdigest():
584 raise DataCorrupt, \
585 "PLAIN data is corrupted. Expected digest '%s', computed '%s'." % (
586 self.digest, m.hexdigest())
588 if f.read(7) != 'ENDREP\n':
589 raise DataCorrupt, "Terminating ENDREP missing!"
592 class TextRep(Rep):
593 def __init__(self, rev, offset, length, size, digest,
594 contentType, currentRev, noderev):
595 super(TextRep,self).__init__('text', rev, offset, length, size,
596 digest, contentType, currentRev, noderev)
599 class PropRep(Rep):
600 def __init__(self, rev, offset, length, size, digest,
601 contentType, currentRev, noderev):
602 super(PropRep,self).__init__('prop', rev, offset, length, size,
603 digest, contentType, currentRev, noderev)
606 class NodeId(object):
607 def __init__(self, nodeid):
608 (self.txn_name, offset) = nodeid.split('/')
609 self.offset = int(offset)
610 self.rev = int(self.txn_name.split('.')[2][1:])
612 def __repr__(self):
613 return self.txn_name + '/%d' % self.offset
615 def __eq__ (self, other):
616 s = self.txn_name + '/%d' % self.offset
617 if s == other:
618 return True
620 return False
623 class NodeType(object):
624 def __init__(self, t):
625 if (t != 'file') and (t != 'dir'):
626 raise ValueError, 'Invalid Node type received: "%s"' % t
627 self.type = t
629 def __repr__(self):
630 return self.type[:]
633 class NodeRev(object):
634 def __init__(self, f, currentRev):
635 self.pred = None
636 self.text = None
637 self.props = None
638 self.cpath = None
639 self.copyroot = None
640 self.copyfrom = None
641 self.dir = []
643 self.nodeOffset = f.tell()
645 while True:
646 line = f.readline()
647 if line == '':
648 raise IOError, "Unexpected end of file"
649 if line == '\n':
650 break
652 # break apart the line
653 try:
654 (field, value) = line.split(':', 1)
655 except:
656 print repr(line)
657 print self.nodeOffset
658 print f.tell()
659 raise
661 # pull of the leading space and trailing new line
662 value = value[1:-1]
664 if field == 'id':
665 self.id = NodeId(value)
666 elif field == 'type':
667 self.type = NodeType(value)
668 elif field == 'pred':
669 self.pred = NodeId(value)
670 elif field == 'text':
671 (rev, offset, length, size, digest) = value.split(' ')
672 rev = int(rev)
673 offset = int(offset)
674 length = int(length)
675 size = int(size)
677 if rev != currentRev:
678 contentType = None
679 else:
680 savedOffset = f.tell()
681 f.seek(offset)
682 contentType = f.read(5)
683 f.seek(savedOffset)
685 self.text = TextRep(rev, offset, length, size, digest,
686 contentType, currentRev, self)
687 elif field == 'props':
688 (rev, offset, length, size, digest) = value.split(' ')
689 rev = int(rev)
690 offset = int(offset)
691 length = int(length)
692 size = int(size)
694 if rev != currentRev:
695 contentType = None
696 else:
697 savedOffset = f.tell()
698 f.seek(offset)
699 contentType = f.read(5)
700 f.seek(savedOffset)
702 self.props = PropRep(rev, offset, length, size, digest,
703 contentType, currentRev, self)
704 elif field == 'cpath':
705 self.cpath = value
706 elif field == 'copyroot':
707 self.copyroot = value
708 elif field == 'copyfrom':
709 self.copyfrom = value
711 if self.type.type == 'dir':
712 if self.text:
713 if self.id.rev == self.text.rev:
714 offset = f.tell()
715 f.seek(self.text.offset)
716 self.dir = getDirHash(f)
717 f.seek(offset)
718 else:
719 # The directory entries are stored in another file.
720 print "Warning: dir entries are stored in rev %d for noderev %s" % (
721 self.text.rev, repr(self.id))
723 def __repr__(self):
724 str = 'NodeRev Id: %s\n type: %s\n' % (repr(self.id), repr(self.type))
725 if self.pred:
726 str = str + ' pred: %s\n' % repr(self.pred)
727 if self.text:
728 str = str + ' %s\n' % repr(self.text)
729 if self.props:
730 str = str + ' %s\n' % repr(self.props)
731 if self.cpath:
732 str = str + ' cpath: %s\n' % self.cpath
733 if self.copyroot:
734 str = str + ' copyroot: %s\n' % self.copyroot
735 if self.copyfrom:
736 str = str + ' copyfrom: %s\n' % self.copyfrom
737 if self.dir:
738 str = str + ' dir contents:\n'
739 for k in self.dir:
740 str = str + ' %s: %s\n' % (k, self.dir[k])
741 return str[:-1]
744 class ChangedPaths(object):
745 def __init__(self, f):
746 self.changedPaths = {}
748 while True:
749 currentOffset = revFile.tell()
750 action = revFile.readline()
751 if action == '\n' or action == '':
752 break
754 path = action[:-1]
755 try:
756 (id, action, textMod, propMod) = action[:-1].split(' ')[:4]
757 except:
758 raise DataCorrupt, \
759 "Data appears to be corrupt at offset %d" % currentOffset
760 path = path[len(' '.join([id, action, textMod, propMod]))+1:]
762 line = revFile.readline()
763 if line != '\n':
764 (copyfromRev, copyfromPath) = line.split(' ')
765 else:
766 copyfromRev = -1
767 copyfromPath = ''
769 self.changedPaths[path] = (id, action, textMod, propMod,
770 copyfromRev, copyfromPath)
773 def __iter__(self):
774 return self.changedPaths.iteritems()
777 def getRootAndChangedPaths(revFile):
778 offset = -2
779 while True:
780 revFile.seek(offset, 2)
781 c = revFile.read(1)
782 if c == '\n':
783 offset = revFile.tell()
784 break
785 offset = offset - 1
787 (rootNode, changedPaths) = map(int, revFile.readline().split(' '))
789 return (rootNode, changedPaths)
792 def dumpChangedPaths(changedPaths):
793 print "Changed Path Information:"
794 for (path,
795 (id, action, textMod, propMod,
796 copyfromRev, copyfromPath)) in changedPaths:
797 print " %s:" % path
798 print " action: %s" % action
799 print " text mod: %s" % textMod
800 print " prop mod: %s" % propMod
801 if copyfromRev != -1:
802 print "copyfrom path: %s" % copyfromPath
803 print "copyfrom rev: %s" % copyfromRev
804 print
807 class WalkStrategy(object):
808 def __init__(self, filename, rootOffset, currentRev):
809 self.f = open(filename, 'rb')
810 self.rootOffset = rootOffset
811 self.f.seek(rootOffset)
812 self.currentRev = currentRev
814 def _nodeWalker(self):
815 raise NotImplementedError, "_nodeWalker is not implemented"
817 def __iter__(self):
818 self.f.seek(self.rootOffset)
819 return self._nodeWalker()
822 class ClassicStrategy(WalkStrategy):
823 def _nodeWalker (self):
824 noderev = NodeRev(self.f, self.currentRev)
825 yield noderev
827 if noderev.type.type == 'dir':
828 for e in noderev.dir:
829 if noderev.dir[e][1].rev == noderev.id.rev:
830 self.f.seek(noderev.dir[e][1].offset)
831 for x in self._nodeWalker():
832 yield x
835 class RegexpStrategy(WalkStrategy):
836 def __init__(self, filename, rootOffset, currentRev):
837 WalkStrategy.__init__(self, filename, rootOffset, currentRev)
839 # File object passed to the NodeRev() constructor so that it
840 # doesn't interfere with our regex search.
841 self.nodeFile = open(filename, 'rb')
843 def _nodeWalker(self):
844 nodeId_re = re.compile(r'^id: [a-z0-9\./]+$')
846 self.f.seek(0)
847 offset = 0
849 for line in self.f:
850 match = nodeId_re.search(line)
851 if match:
852 self.nodeFile.seek(offset)
853 noderev = NodeRev(self.nodeFile, self.currentRev)
854 yield noderev
856 offset = offset + len(line)
859 def verify(noderev, revFile, dumpInstructions, dumpWindows):
860 print noderev
862 if noderev.text:
863 noderev.text.verify(revFile,
864 dumpInstructions,
865 dumpWindows)
867 if noderev.props and noderev.props.rev == noderev.props.currentRev:
868 noderev.props.verify(revFile,
869 dumpInstructions,
870 dumpWindows)
872 print
875 def truncate(noderev, revFile):
876 txnId = noderev.id
878 print "Truncating node %s (%s)" % (txnId, noderev.cpath)
880 # Grab the text rep
881 textRep = noderev.text
883 # Fix the text rep contents
884 offset = textRep.offset
885 revFile.seek(offset, 0)
886 revFile.write("PLAIN\x0aENDREP\x0a")
888 # Fix the node rev
889 offset = noderev.nodeOffset
890 revFile.seek(offset, 0)
891 while True:
892 savedOffset = revFile.tell()
893 s = revFile.readline()
894 if s[:4] == 'text':
895 revFile.seek(savedOffset, 0)
896 break
898 line = revFile.readline()
899 revFile.seek(savedOffset, 0)
900 fields = line.split(' ')
901 overallLength = len(line)
903 fields[3] = '0' * len(fields[3])
904 fields[4] = '0' * len(fields[4])
905 fields[5] = 'd41d8cd98f00b204e9800998ecf8427e'
906 newTextRep = ' '.join(fields) + '\x0a'
907 assert(len(newTextRep) == overallLength)
908 revFile.write(newTextRep)
909 print "Done."
910 sys.exit(0)
913 def fixHeader(e, revFile):
914 '''Attempt to fix the rep header. e is expected to be of type
915 InvalidRepHeader, since the exception stores the necessary information
916 to help repair the file.'''
918 # First, we need to locate the real start of the text rep
919 textrep_re = re.compile(r'^(DELTA( \d+ \d+ \d+)?|PLAIN)$')
921 revFile.seek(0)
922 offset = 0
923 originalOffset = 0
924 for line in revFile:
925 m = textrep_re.match(line)
926 if m:
927 if offset >= originalOffset and offset < e.offset:
928 originalOffset = offset
929 headerLen = len(line)
930 offset = offset + len(line)
932 print "Original text rep located at", originalOffset
934 # Okay, now we have the original offset of the text rep that was
935 # in the process of being written out. The header portion of the
936 # text rep has a fsync() done after it, so the 4K blocks actually
937 # start after the header. We need to make sure to copy the header
938 # and the next 4K, to be on the safe side.
939 copyLen = 4096 + headerLen
941 revFile.seek(originalOffset)
942 block = revFile.read(copyLen)
943 print "Copy %d bytes from offset %d" % (copyLen, originalOffset)
945 print "Write %d bytes at offset %d" % (copyLen, e.offset)
946 revFile.seek(e.offset)
947 revFile.write(block)
948 revFile.flush()
950 print "Fixed? :-) Re-run fsfsverify without the -f option"
953 def fixStream(e, revFile):
954 startOffset = e.svndiffStart
955 errorOffset = e.windowOffset
957 repeatedBlockOffset = errorOffset - ((errorOffset - startOffset) % 4096)
959 # Now we need to move up the rest of the rep
961 # Determine the final offset by finding the end of the rep.
962 revFile.seek(errorOffset)
964 endrep_re = re.compile(".*ENDREP$")
965 srcLength = 0
966 for l in revFile:
967 srcLength += len(l)
968 m = endrep_re.match(l)
969 if m:
970 break
972 if not m:
973 raise "Couldn't find end of rep!"
975 finalOffset = errorOffset + srcLength
976 srcOffset = errorOffset
977 destOffset = repeatedBlockOffset
979 print "Copy %d bytes from offset %d" % (srcLength, srcOffset)
980 print "Write %d bytes at offset %d" % (srcLength, destOffset)
982 while srcOffset < finalOffset:
983 blen = 64*1024
984 if (finalOffset - srcOffset) < blen:
985 blen = finalOffset - srcOffset
986 revFile.seek(srcOffset)
987 block = revFile.read(blen)
988 revFile.seek(destOffset)
989 revFile.write(block)
991 srcOffset += blen
992 destOffset += blen
994 revFile.flush()
995 revFile.close()
997 print "Fixed? :-) Re-run fsfsverify without the -f option"
1000 def checkOptions(options):
1001 count = 0
1002 for k,v in options.__dict__.items():
1003 if v and (k in ['dumpChanged', 'truncate', 'fixRlle']):
1004 count = count + 1
1006 if count > 1:
1007 print >>sys.stderr, "Please use only one of -c, -f, and -t."
1008 sys.exit(1)
1010 if options.dumpChanged and (options.dumpWindows or options.dumpInstructions):
1011 print >>sys.stderr, \
1012 "-c is incompatible with -w and -i. Dropping -w and/or -i."
1014 if options.noVerify and (options.dumpWindows or options.dumpInstructions):
1015 print >>sys.stderr, \
1016 "--no-verify is incompatible with -w and -i. Dropping -w and/or -i."
1019 def handleError(error, withTraceback=False):
1020 print
1021 if withTraceback:
1022 import traceback
1023 traceback.print_exc()
1025 print >>sys.stderr,"Error %s: %s" % (error.__class__.__name__, str(e))
1026 print >>sys.stderr,"Try running with -f to fix the revision"
1027 sys.exit(1)
1030 if __name__ == '__main__':
1031 from optparse import OptionParser
1033 parser = OptionParser("usage: %prog [-w | -i | -r | -n] REV-FILE")
1034 parser.add_option("-c", "--changed-paths",
1035 action="store_true", dest="dumpChanged",
1036 help="Dump changed path information", default=False)
1037 parser.add_option("", "--no-verify",
1038 action="store_true", dest="noVerify",
1039 help="Don't parse svndiff streams.", default=False)
1040 parser.add_option("-i", "--instructions",
1041 action="store_true", dest="dumpInstructions",
1042 help="Dump instructions (implies -w)", default=False)
1043 parser.add_option("-w", "--windows",
1044 action="store_true", dest="dumpWindows",
1045 help="Dump windows", default=False)
1046 parser.add_option("-n", "--noderev-regexp",
1047 action="store_true", dest="noderevRegexp",
1048 help="Find all noderevs using a regexp", default=False)
1049 parser.add_option("-f", "--fix-read-length-line-error",
1050 action="store_true", dest="fixRlle",
1051 help="Attempt to fix the read length line error",
1052 default=False)
1053 parser.add_option("-t", "--truncate",
1054 action="store", type="string", dest="truncate",
1055 help="Truncate the specified node rev.",
1056 default=None)
1057 parser.add_option("", "--traceback",
1058 action="store_true", dest="showTraceback",
1059 help="Show error tracebacks (mainly used for debugging).",
1060 default=False)
1062 (options, args) = parser.parse_args()
1064 if len(args) != 1:
1065 print >>sys.stderr, "Please specify exactly one rev file."
1066 parser.print_help()
1067 sys.exit(1)
1069 checkOptions(options)
1071 filename = args[0]
1073 if options.dumpInstructions:
1074 options.dumpWindows = True
1075 LOG_MASK |= LOG_INSTRUCTIONS
1077 if options.dumpWindows:
1078 LOG_MASK |= LOG_WINDOWS
1080 if options.truncate or options.fixRlle:
1081 revFile = open(filename, 'r+b')
1082 else:
1083 revFile = open(filename, 'rb')
1085 (root, changed) = getRootAndChangedPaths(revFile)
1087 if options.dumpChanged:
1088 revFile.seek(changed)
1089 changedPaths = ChangedPaths(revFile)
1091 dumpChangedPaths(changedPaths)
1092 sys.exit(0)
1094 try:
1095 import re
1096 match = re.match('([0-9]+)', os.path.basename(filename))
1097 currentRev = int(match.group(1), 10)
1098 except:
1099 raise CmdlineError, \
1100 "The file name must start with a decimal number that indicates the revision"
1102 if options.noderevRegexp:
1103 strategy = RegexpStrategy(filename, root, currentRev)
1104 else:
1105 strategy = ClassicStrategy(filename, root, currentRev)
1107 # Make stderr the same as stdout. This helps when trying to catch all of the
1108 # output from a run.
1109 sys.stderr = sys.stdout
1111 try:
1112 for noderev in strategy:
1113 try:
1114 if options.truncate:
1115 # Check to see if this is the rev we need to truncate
1116 if options.truncate == noderev.id:
1117 truncate(noderev, revFile)
1119 else:
1120 print noderev
1122 if not options.noVerify:
1123 if noderev.text:
1124 noderev.text.verify(revFile,
1125 options.dumpInstructions,
1126 options.dumpWindows)
1128 if noderev.props and noderev.props.rev == noderev.props.currentRev:
1129 noderev.props.verify(revFile,
1130 options.dumpInstructions,
1131 options.dumpWindows)
1133 print
1134 except:
1135 sys.stdout.flush()
1136 raise
1137 except InvalidRepHeader, e:
1138 if not options.fixRlle:
1139 handleError(e, options.showTraceback)
1141 fixHeader(e, revFile)
1143 except PotentiallyFixableException, e:
1144 if not options.fixRlle:
1145 handleError(e, options.showTraceback)
1147 fixStream(e, revFile)