2 # Copyright (c) 2006, 2007 by John Szakmeister <john at szakmeister dot net>
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 class FsfsVerifyException(Exception):
28 class PotentiallyFixableException(FsfsVerifyException
):
29 '''Represents a class of problems that we may be able to fix.'''
31 def __init__(self
, message
, offset
):
32 FsfsVerifyException
.__init
__(self
, message
)
36 class InvalidInstruction(PotentiallyFixableException
):
40 class InvalidCompressedStream(PotentiallyFixableException
):
44 class InvalidRepHeader(PotentiallyFixableException
):
48 class InvalidWindow(PotentiallyFixableException
):
52 class InvalidSvndiffVersion(FsfsVerifyException
):
56 class InvalidSvndiffHeader(FsfsVerifyException
):
60 class DataCorrupt(FsfsVerifyException
):
64 class NoMoreData(FsfsVerifyException
):
72 LOG_MASK
= LOG_SVNDIFF
75 def log(type, indent
, format
, *args
):
77 indentStr
= ' ' * indent
79 str = '\n'.join([indentStr
+ x
for x
in str.split('\n')])
83 class ByteStream(object):
84 def __init__(self
, fileobj
):
88 return ord(self
._f
.read(1))
93 def advance(self
, numBytes
):
94 self
._f
.seek(numBytes
, 1)
97 if hasattr(self
._f
, 'clone'):
98 newFileObj
= self
._f
.clone()
100 # We expect the file object to map to a real file
102 # Tried using dup(), but (at least on the mac), that ends up
103 # creating 2 handles to the same underlying os file object,
104 # instead of two independent file objects. So, we resort to
105 # an open call to create a new file object
106 newFileObj
= open(self
._f
.name
, 'rb')
107 newFileObj
.seek(self
._f
.tell())
108 return ByteStream(newFileObj
)
110 # The following let ByteStream behave as a file within the
111 # context of this script.
113 def read(self
, *args
, **kwargs
):
114 return self
._f
.read(*args
, **kwargs
)
116 def seek(self
, *args
, **kwargs
):
117 return self
._f
.seek(*args
, **kwargs
)
120 class ZlibByteStream(ByteStream
):
121 def __init__(self
, fileobj
, length
):
124 # Store the number of bytes consumed thus far so we can compute an offset
125 self
._numBytesConsumed
= 0
127 self
._startingOffset
= self
._f
.tell()
129 import zlib
, binascii
130 self
._z
= zlib
.decompressobj(15)
132 self
._buffer
= self
._z
.decompress(self
._f
.read(length
))
133 self
._origBufferLength
= len(self
._buffer
)
137 raise NoMoreData
, "Unexpected end of data stream!"
139 byte
= self
._buffer
[0]
140 self
._buffer
= self
._buffer
[1:]
145 return self
._origBufferLength
- len(self
._buffer
)
147 def advance(self
, numBytes
):
152 if hasattr(self
._f
, 'clone'):
153 newFileObj
= self
._f
.clone()
155 newFileObj
= open(self
._f
.name
, 'rb')
156 newFileObj
.seek(self
._f
.tell())
157 return ByteStream(newFileObj
)
159 # The following let ByteStream behave as a file within the
160 # context of this script.
162 def read(self
, *args
, **kwargs
):
165 def seek(self
, *args
, **kwargs
):
169 def getVarint(byteStream
):
170 '''Grabs a variable sized int from a bitstream (meaning this function
175 byte
= byteStream
.readByte()
176 i
= (i
<< 7) + (byte
& 0x7F)
182 INSTR_COPY_SOURCE
= 'copy-source'
183 INSTR_COPY_TARGET
= 'copy-target'
184 INSTR_COPY_DATA
= 'copy-data'
187 class SvndiffInstruction(object):
188 def __init__(self
, byteStream
):
189 self
.instrOffset
= byteStream
.tell()
191 byte
= byteStream
.readByte()
193 instruction
= (byte
>> 6) & 3
197 raise InvalidInstruction(
198 "Invalid instruction found at offset %d (%02X)" % (self
.instrOffset
,
203 self
.type = INSTR_COPY_SOURCE
204 elif instruction
== 1:
205 self
.type = INSTR_COPY_TARGET
207 self
.type = INSTR_COPY_DATA
210 # Length is coded as a varint following the current byte
211 length
= getVarint(byteStream
)
216 if (self
.type == INSTR_COPY_SOURCE
) or (self
.type == INSTR_COPY_TARGET
):
217 self
.offset
= getVarint(byteStream
)
219 if self
.type == INSTR_COPY_SOURCE
:
220 self
.sourceOffset
= self
.offset
222 self
.sourceOffset
= 0
224 if self
.type == INSTR_COPY_TARGET
:
225 self
.targetOffset
= self
.offset
227 self
.targetOffset
= 0
229 # Determine the number of bytes consumed in the source stream, target
230 # stream, and the data stream
232 if self
.type == INSTR_COPY_SOURCE
:
233 self
.sourceLength
= self
.length
235 self
.sourceLength
= 0
237 if self
.type == INSTR_COPY_TARGET
:
238 self
.targetLength
= self
.length
240 self
.targetLength
= 0
242 if self
.type == INSTR_COPY_DATA
:
243 self
.dataLength
= self
.length
247 self
.instrLength
= byteStream
.tell() - self
.instrOffset
250 return '<SvndiffInstruction %s so:%d sl:%d to: %d tl:%d dl:%d (%d, %d)>' % (
251 self
.type, self
.sourceOffset
, self
.sourceLength
, self
.targetOffset
,
252 self
.targetLength
, self
.dataLength
, self
.instrOffset
, self
.instrLength
)
255 class Window(object):
256 def __init__(self
, byteStream
, svndiffVersion
):
257 if svndiffVersion
not in [0, 1]:
258 raise InvalidSvndiffVersion
, \
259 "Invalid svndiff version %d" % svndiffVersion
261 # Record the initial offset of the window
262 self
.windowOffset
= byteStream
.tell()
265 self
.sourceOffset
= getVarint(byteStream
)
266 self
.sourceLength
= getVarint(byteStream
)
267 self
.targetLength
= getVarint(byteStream
)
268 self
.instrLength
= getVarint(byteStream
)
269 self
.dataLength
= getVarint(byteStream
)
270 self
.windowHeaderLength
= byteStream
.tell() - self
.windowOffset
271 self
.windowLength
= \
272 self
.windowHeaderLength
+ self
.instrLength
+ self
.dataLength
274 # Store the byte stream, and clone it for use as a data stream.
275 self
.instrByteStream
= byteStream
276 self
.dataByteStream
= byteStream
.clone()
278 # Advance the data stream past the instructions to the start of the data.
279 self
.dataByteStream
.advance(self
.instrLength
)
282 "The window header at offset %d appears to be corrupted" % \
285 e
.windowOffset
= self
.windowOffset
289 # In svndiff1, the instruction area starts with a varint-encoded length.
290 # If this length matches the one encoded in the header, then there is no
291 # compression. If it differs, then the stream is compressed with zlib.
293 self
.origInstrStream
= self
.instrByteStream
294 self
.origDataStream
= self
.dataByteStream
295 self
.isInstrCompressed
= False
296 self
.isDataCompressed
= False
297 self
.compressedInstrLength
= self
.instrLength
298 self
.compressedDataLength
= self
.dataLength
300 if svndiffVersion
== 1:
302 offset
= self
.instrByteStream
.tell()
303 encodedInstrLength
= getVarint(self
.instrByteStream
)
304 instrIntSize
= self
.instrByteStream
.tell() - offset
306 offset
= self
.dataByteStream
.tell()
307 encodedDataLength
= getVarint(self
.dataByteStream
)
308 dataIntSize
= self
.dataByteStream
.tell() - offset
310 self
.instrLength
= encodedInstrLength
311 self
.dataLength
= encodedDataLength
314 "The window header at offset %d appears to be corrupted" % \
317 e
.windowOffset
= self
.windowOffset
320 # Now, we need to make a determination about whether the data and
321 # instructions are compressed. If they are, we need to zlib decompress
322 # them. We do that by creating another stream and that will decompress
323 # the data on the fly.
325 offset
= self
.instrByteStream
.tell()
326 if self
.compressedInstrLength
- instrIntSize
!= self
.instrLength
:
327 self
.origInstrStream
= self
.instrByteStream
328 self
.instrByteStream
= ZlibByteStream(self
.origInstrStream
,
329 self
.compressedInstrLength
)
330 self
.isInstrCompressed
= True
332 new_e
= InvalidCompressedStream(
333 "Invalid compressed instr stream at offset %d (%s)" % (offset
,
336 new_e
.windowOffset
= self
.windowOffset
340 offset
= self
.dataByteStream
.tell()
341 if self
.compressedDataLength
- dataIntSize
!= self
.dataLength
:
342 self
.origDataStream
= self
.dataByteStream
343 self
.dataByteStream
= ZlibByteStream(self
.origDataStream
,
344 self
.compressedDataLength
)
345 self
.isDataCompressed
= True
347 new_e
= InvalidCompressedStream(
348 "Invalid compressed data stream at offset %d (%s)" % (offset
,
351 new_e
.windowOffset
= self
.windowOffset
355 expectedInstrLength
= self
.instrLength
356 expectedDataLength
= self
.dataLength
357 expectedTargetLength
= self
.targetLength
358 expectedSourceLength
= self
.sourceLength
360 computedInstrLength
= 0
361 computedDataLength
= 0
362 computedTargetLength
= 0
363 computedSourceLength
= 0
365 if expectedInstrLength
== 0:
367 "Corrupt window (at offset %d) has 0 instructions?!" % self
.windowOffset
,
369 e
.windowOffset
= self
.windowOffset
372 while computedInstrLength
< expectedInstrLength
:
374 instr
= SvndiffInstruction(self
.instrByteStream
)
375 except PotentiallyFixableException
, e
:
377 e
.windowOffset
= self
.windowOffset
380 log(LOG_INSTRUCTIONS
, 4, repr(instr
))
382 computedInstrLength
+= instr
.instrLength
383 computedDataLength
+= instr
.dataLength
384 computedSourceLength
+= instr
.sourceLength
385 computedTargetLength
+= \
386 instr
.targetLength
+ instr
.sourceLength
+ instr
.dataLength
388 if computedInstrLength
!= expectedInstrLength
:
390 "The number of instruction bytes consumed (%d) doesn't match the expected number (%d)" % \
391 (computedInstrLength
, expectedInstrLength
),
393 e
.windowOffset
= self
.windowOffset
396 if computedDataLength
!= expectedDataLength
:
398 "The number of data bytes consumed (%d) doesn't match the expected number (%d)" % \
399 (computedDataLength
, expectedDataLength
),
401 e
.windowOffset
= self
.windowOffset
404 if computedTargetLength
!= expectedTargetLength
:
406 "The number of target bytes consumed (%d) doesn't match the expected number (%d)" % \
407 (computedTargetLength
, expectedTargetLength
),
409 e
.windowOffset
= self
.windowOffset
412 # It appears that the source length specified in the window, isn't exactly
413 # equal to what gets consumed. I suspect that's because the algorithm is using different
414 # offsets within the window, and one offset/length pair will reach the end of the window.
415 # However, this hasn't shown to be a clear indicator of corruption. So for now, I'm
418 #if computedSourceLength != expectedSourceLength:
420 # "The number of source bytes consumed (%d) doesn't match the expected number (%d)" % \
421 # (computedSourceLength, expectedSourceLength),
423 # e.windowOffset = self.windowOffset
426 # Advance past the data. We do this using seek because we might have
427 # read a few bytes from the stream if it potentially had compressed data
428 self
.origInstrStream
.seek(self
.windowOffset
+ self
.windowLength
)
431 if hasattr(self
, 'compressedInstrLength'):
432 str = 'cil: %d cdl: %d ' % (self
.compressedInstrLength
,
433 self
.compressedDataLength
)
437 return "<Window wo:%d so:%d sl:%d tl:%d %sil:%d dl:%d whl:%d wl:%d>" % (
438 self
.windowOffset
, self
.sourceOffset
, self
.sourceLength
,
439 self
.targetLength
, str, self
.instrLength
, self
.dataLength
,
440 self
.windowHeaderLength
, self
.windowLength
)
443 class Svndiff(object):
444 def __init__(self
, fileobj
, length
):
446 self
.startingOffset
= self
._f
.tell()
448 header
= self
._f
.read(4)
451 "Unexpected end of file while svndiff header at offset %d)" % \
454 if header
[0:3] != 'SVN':
455 raise InvalidSvndiffHeader
, "Invalid svndiff header at offset %d" % \
456 (self
.startingOffset
)
458 self
.version
= ord(header
[3])
459 if self
.version
not in [0, 1]:
460 raise InvalidSvndiffVersion
, "Invalid svndiff version %d" % self
.version
462 self
._length
= length
- 4
465 self
._f
.seek(self
.startingOffset
+4)
467 bs
= ByteStream(self
._f
)
469 log(LOG_SVNDIFF
, 2, "<Svndiff so: %d ver: %d>", self
.startingOffset
,
473 remaining
= self
._length
475 w
= Window(bs
, self
.version
)
476 log(LOG_WINDOWS
, 3, repr(w
))
478 remaining
-= w
.windowLength
479 except PotentiallyFixableException
, e
:
480 e
.svndiffStart
= self
.startingOffset
487 raise ValueError, "Expected a PLAIN representation (%d)" % f
.tell()
492 field
= f
.readline()[:-1]
495 assert(field
[0] == 'K')
496 length
= int(field
.split(' ')[1])
497 field
= f
.readline()[:length
]
499 value
= f
.readline()[:-1]
500 assert(value
[0] == 'V')
501 length
= int(value
.split(' ')[1])
502 value
= f
.readline()[:length
]
504 (type, txn
) = value
.split(' ')
505 hash[field
] = [NodeType(type), NodeId(txn
)]
512 def __init__(self
, type, rev
, offset
, length
, size
, digest
,
513 contentType
, currentRev
, noderev
):
516 self
.offset
= int(offset
)
517 self
.length
= int(length
)
518 self
.size
= int(size
)
520 self
.currentRev
= currentRev
522 self
.contentType
= contentType
523 self
.noderev
= noderev
526 if not self
.contentType
:
527 contentType
= 'UNKNOWN'
529 if self
.contentType
not in ['PLAIN', 'DELTA', None]:
530 contentType
= 'INVALID'
532 contentType
= self
.contentType
533 return '%s: %s %d %d %d %d %s' % (self
.type, contentType
, self
.rev
,
534 self
.offset
, self
.length
, self
.size
,
537 def verify(self
, f
, dumpInstructions
, dumpWindows
):
538 if self
.contentType
not in ['PLAIN', 'DELTA', None]:
539 e
= InvalidRepHeader("Invalid rep header found at %d (%s)!" % \
540 (self
.offset
, self
.contentType
),
543 e
.noderev
= self
.noderev
546 if self
.rev
!= currentRev
:
547 print >>sys
.stderr
, "Skipping text rep since it isn't present in the current rev"
552 if header
!= self
.contentType
:
553 raise FsfsVerifyException
, \
554 "Invalid rep header found at %d (%s, %s)!" % (self
.offset
, header
,
557 if header
== 'DELTA':
558 # Consume the rest of the DELTA header
559 while f
.read(1) != '\n':
562 # This should be the start of the svndiff stream
563 actual_start
= f
.tell()
565 svndiff
= Svndiff(f
, self
.length
)
570 e
.noderev
= self
.noderev
574 assert(digest
== self
.digest
)
576 if f
.read(1) != '\n':
577 raise DataCorrupt
, "Expected a '\\n' after PLAIN"
581 m
.update(f
.read(self
.length
))
583 if self
.digest
and self
.digest
!= m
.hexdigest():
585 "PLAIN data is corrupted. Expected digest '%s', computed '%s'." % (
586 self
.digest
, m
.hexdigest())
588 if f
.read(7) != 'ENDREP\n':
589 raise DataCorrupt
, "Terminating ENDREP missing!"
593 def __init__(self
, rev
, offset
, length
, size
, digest
,
594 contentType
, currentRev
, noderev
):
595 super(TextRep
,self
).__init
__('text', rev
, offset
, length
, size
,
596 digest
, contentType
, currentRev
, noderev
)
600 def __init__(self
, rev
, offset
, length
, size
, digest
,
601 contentType
, currentRev
, noderev
):
602 super(PropRep
,self
).__init
__('prop', rev
, offset
, length
, size
,
603 digest
, contentType
, currentRev
, noderev
)
606 class NodeId(object):
607 def __init__(self
, nodeid
):
608 (self
.txn_name
, offset
) = nodeid
.split('/')
609 self
.offset
= int(offset
)
610 self
.rev
= int(self
.txn_name
.split('.')[2][1:])
613 return self
.txn_name
+ '/%d' % self
.offset
615 def __eq__ (self
, other
):
616 s
= self
.txn_name
+ '/%d' % self
.offset
623 class NodeType(object):
624 def __init__(self
, t
):
625 if (t
!= 'file') and (t
!= 'dir'):
626 raise ValueError, 'Invalid Node type received: "%s"' % t
633 class NodeRev(object):
634 def __init__(self
, f
, currentRev
):
643 self
.nodeOffset
= f
.tell()
648 raise IOError, "Unexpected end of file"
652 # break apart the line
654 (field
, value
) = line
.split(':', 1)
657 print self
.nodeOffset
661 # pull of the leading space and trailing new line
665 self
.id = NodeId(value
)
666 elif field
== 'type':
667 self
.type = NodeType(value
)
668 elif field
== 'pred':
669 self
.pred
= NodeId(value
)
670 elif field
== 'text':
671 (rev
, offset
, length
, size
, digest
) = value
.split(' ')
677 if rev
!= currentRev
:
680 savedOffset
= f
.tell()
682 contentType
= f
.read(5)
685 self
.text
= TextRep(rev
, offset
, length
, size
, digest
,
686 contentType
, currentRev
, self
)
687 elif field
== 'props':
688 (rev
, offset
, length
, size
, digest
) = value
.split(' ')
694 if rev
!= currentRev
:
697 savedOffset
= f
.tell()
699 contentType
= f
.read(5)
702 self
.props
= PropRep(rev
, offset
, length
, size
, digest
,
703 contentType
, currentRev
, self
)
704 elif field
== 'cpath':
706 elif field
== 'copyroot':
707 self
.copyroot
= value
708 elif field
== 'copyfrom':
709 self
.copyfrom
= value
711 if self
.type.type == 'dir':
713 if self
.id.rev
== self
.text
.rev
:
715 f
.seek(self
.text
.offset
)
716 self
.dir = getDirHash(f
)
719 # The directory entries are stored in another file.
720 print "Warning: dir entries are stored in rev %d for noderev %s" % (
721 self
.text
.rev
, repr(self
.id))
724 str = 'NodeRev Id: %s\n type: %s\n' % (repr(self
.id), repr(self
.type))
726 str = str + ' pred: %s\n' % repr(self
.pred
)
728 str = str + ' %s\n' % repr(self
.text
)
730 str = str + ' %s\n' % repr(self
.props
)
732 str = str + ' cpath: %s\n' % self
.cpath
734 str = str + ' copyroot: %s\n' % self
.copyroot
736 str = str + ' copyfrom: %s\n' % self
.copyfrom
738 str = str + ' dir contents:\n'
740 str = str + ' %s: %s\n' % (k
, self
.dir[k
])
744 class ChangedPaths(object):
745 def __init__(self
, f
):
746 self
.changedPaths
= {}
749 currentOffset
= revFile
.tell()
750 action
= revFile
.readline()
751 if action
== '\n' or action
== '':
756 (id, action
, textMod
, propMod
) = action
[:-1].split(' ')[:4]
759 "Data appears to be corrupt at offset %d" % currentOffset
760 path
= path
[len(' '.join([id, action
, textMod
, propMod
]))+1:]
762 line
= revFile
.readline()
764 (copyfromRev
, copyfromPath
) = line
.split(' ')
769 self
.changedPaths
[path
] = (id, action
, textMod
, propMod
,
770 copyfromRev
, copyfromPath
)
774 return self
.changedPaths
.iteritems()
777 def getRootAndChangedPaths(revFile
):
780 revFile
.seek(offset
, 2)
783 offset
= revFile
.tell()
787 (rootNode
, changedPaths
) = map(int, revFile
.readline().split(' '))
789 return (rootNode
, changedPaths
)
792 def dumpChangedPaths(changedPaths
):
793 print "Changed Path Information:"
795 (id, action
, textMod
, propMod
,
796 copyfromRev
, copyfromPath
)) in changedPaths
:
798 print " action: %s" % action
799 print " text mod: %s" % textMod
800 print " prop mod: %s" % propMod
801 if copyfromRev
!= -1:
802 print "copyfrom path: %s" % copyfromPath
803 print "copyfrom rev: %s" % copyfromRev
807 class WalkStrategy(object):
808 def __init__(self
, filename
, rootOffset
, currentRev
):
809 self
.f
= open(filename
, 'rb')
810 self
.rootOffset
= rootOffset
811 self
.f
.seek(rootOffset
)
812 self
.currentRev
= currentRev
814 def _nodeWalker(self
):
815 raise NotImplementedError, "_nodeWalker is not implemented"
818 self
.f
.seek(self
.rootOffset
)
819 return self
._nodeWalker
()
822 class ClassicStrategy(WalkStrategy
):
823 def _nodeWalker (self
):
824 noderev
= NodeRev(self
.f
, self
.currentRev
)
827 if noderev
.type.type == 'dir':
828 for e
in noderev
.dir:
829 if noderev
.dir[e
][1].rev
== noderev
.id.rev
:
830 self
.f
.seek(noderev
.dir[e
][1].offset
)
831 for x
in self
._nodeWalker
():
835 class RegexpStrategy(WalkStrategy
):
836 def __init__(self
, filename
, rootOffset
, currentRev
):
837 WalkStrategy
.__init
__(self
, filename
, rootOffset
, currentRev
)
839 # File object passed to the NodeRev() constructor so that it
840 # doesn't interfere with our regex search.
841 self
.nodeFile
= open(filename
, 'rb')
843 def _nodeWalker(self
):
844 nodeId_re
= re
.compile(r
'^id: [a-z0-9\./]+$')
850 match
= nodeId_re
.search(line
)
852 self
.nodeFile
.seek(offset
)
853 noderev
= NodeRev(self
.nodeFile
, self
.currentRev
)
856 offset
= offset
+ len(line
)
859 def verify(noderev
, revFile
, dumpInstructions
, dumpWindows
):
863 noderev
.text
.verify(revFile
,
867 if noderev
.props
and noderev
.props
.rev
== noderev
.props
.currentRev
:
868 noderev
.props
.verify(revFile
,
875 def truncate(noderev
, revFile
):
878 print "Truncating node %s (%s)" % (txnId
, noderev
.cpath
)
881 textRep
= noderev
.text
883 # Fix the text rep contents
884 offset
= textRep
.offset
885 revFile
.seek(offset
, 0)
886 revFile
.write("PLAIN\x0aENDREP\x0a")
889 offset
= noderev
.nodeOffset
890 revFile
.seek(offset
, 0)
892 savedOffset
= revFile
.tell()
893 s
= revFile
.readline()
895 revFile
.seek(savedOffset
, 0)
898 line
= revFile
.readline()
899 revFile
.seek(savedOffset
, 0)
900 fields
= line
.split(' ')
901 overallLength
= len(line
)
903 fields
[3] = '0' * len(fields
[3])
904 fields
[4] = '0' * len(fields
[4])
905 fields
[5] = 'd41d8cd98f00b204e9800998ecf8427e'
906 newTextRep
= ' '.join(fields
) + '\x0a'
907 assert(len(newTextRep
) == overallLength
)
908 revFile
.write(newTextRep
)
913 def fixHeader(e
, revFile
):
914 '''Attempt to fix the rep header. e is expected to be of type
915 InvalidRepHeader, since the exception stores the necessary information
916 to help repair the file.'''
918 # First, we need to locate the real start of the text rep
919 textrep_re
= re
.compile(r
'^(DELTA( \d+ \d+ \d+)?|PLAIN)$')
925 m
= textrep_re
.match(line
)
927 if offset
>= originalOffset
and offset
< e
.offset
:
928 originalOffset
= offset
929 headerLen
= len(line
)
930 offset
= offset
+ len(line
)
932 print "Original text rep located at", originalOffset
934 # Okay, now we have the original offset of the text rep that was
935 # in the process of being written out. The header portion of the
936 # text rep has a fsync() done after it, so the 4K blocks actually
937 # start after the header. We need to make sure to copy the header
938 # and the next 4K, to be on the safe side.
939 copyLen
= 4096 + headerLen
941 revFile
.seek(originalOffset
)
942 block
= revFile
.read(copyLen
)
943 print "Copy %d bytes from offset %d" % (copyLen
, originalOffset
)
945 print "Write %d bytes at offset %d" % (copyLen
, e
.offset
)
946 revFile
.seek(e
.offset
)
950 print "Fixed? :-) Re-run fsfsverify without the -f option"
953 def fixStream(e
, revFile
):
954 startOffset
= e
.svndiffStart
955 errorOffset
= e
.windowOffset
957 repeatedBlockOffset
= errorOffset
- ((errorOffset
- startOffset
) % 4096)
959 # Now we need to move up the rest of the rep
961 # Determine the final offset by finding the end of the rep.
962 revFile
.seek(errorOffset
)
964 endrep_re
= re
.compile(".*ENDREP$")
968 m
= endrep_re
.match(l
)
973 raise "Couldn't find end of rep!"
975 finalOffset
= errorOffset
+ srcLength
976 srcOffset
= errorOffset
977 destOffset
= repeatedBlockOffset
979 print "Copy %d bytes from offset %d" % (srcLength
, srcOffset
)
980 print "Write %d bytes at offset %d" % (srcLength
, destOffset
)
982 while srcOffset
< finalOffset
:
984 if (finalOffset
- srcOffset
) < blen
:
985 blen
= finalOffset
- srcOffset
986 revFile
.seek(srcOffset
)
987 block
= revFile
.read(blen
)
988 revFile
.seek(destOffset
)
997 print "Fixed? :-) Re-run fsfsverify without the -f option"
1000 def checkOptions(options
):
1002 for k
,v
in options
.__dict
__.items():
1003 if v
and (k
in ['dumpChanged', 'truncate', 'fixRlle']):
1007 print >>sys
.stderr
, "Please use only one of -c, -f, and -t."
1010 if options
.dumpChanged
and (options
.dumpWindows
or options
.dumpInstructions
):
1011 print >>sys
.stderr
, \
1012 "-c is incompatible with -w and -i. Dropping -w and/or -i."
1014 if options
.noVerify
and (options
.dumpWindows
or options
.dumpInstructions
):
1015 print >>sys
.stderr
, \
1016 "--no-verify is incompatible with -w and -i. Dropping -w and/or -i."
1019 def handleError(error
, withTraceback
=False):
1023 traceback
.print_exc()
1025 print >>sys
.stderr
,"Error %s: %s" % (error
.__class
__.__name
__, str(e
))
1026 print >>sys
.stderr
,"Try running with -f to fix the revision"
1030 if __name__
== '__main__':
1031 from optparse
import OptionParser
1033 parser
= OptionParser("usage: %prog [-w | -i | -r | -n] REV-FILE")
1034 parser
.add_option("-c", "--changed-paths",
1035 action
="store_true", dest
="dumpChanged",
1036 help="Dump changed path information", default
=False)
1037 parser
.add_option("", "--no-verify",
1038 action
="store_true", dest
="noVerify",
1039 help="Don't parse svndiff streams.", default
=False)
1040 parser
.add_option("-i", "--instructions",
1041 action
="store_true", dest
="dumpInstructions",
1042 help="Dump instructions (implies -w)", default
=False)
1043 parser
.add_option("-w", "--windows",
1044 action
="store_true", dest
="dumpWindows",
1045 help="Dump windows", default
=False)
1046 parser
.add_option("-n", "--noderev-regexp",
1047 action
="store_true", dest
="noderevRegexp",
1048 help="Find all noderevs using a regexp", default
=False)
1049 parser
.add_option("-f", "--fix-read-length-line-error",
1050 action
="store_true", dest
="fixRlle",
1051 help="Attempt to fix the read length line error",
1053 parser
.add_option("-t", "--truncate",
1054 action
="store", type="string", dest
="truncate",
1055 help="Truncate the specified node rev.",
1057 parser
.add_option("", "--traceback",
1058 action
="store_true", dest
="showTraceback",
1059 help="Show error tracebacks (mainly used for debugging).",
1062 (options
, args
) = parser
.parse_args()
1065 print >>sys
.stderr
, "Please specify exactly one rev file."
1069 checkOptions(options
)
1073 if options
.dumpInstructions
:
1074 options
.dumpWindows
= True
1075 LOG_MASK |
= LOG_INSTRUCTIONS
1077 if options
.dumpWindows
:
1078 LOG_MASK |
= LOG_WINDOWS
1080 if options
.truncate
or options
.fixRlle
:
1081 revFile
= open(filename
, 'r+b')
1083 revFile
= open(filename
, 'rb')
1085 (root
, changed
) = getRootAndChangedPaths(revFile
)
1087 if options
.dumpChanged
:
1088 revFile
.seek(changed
)
1089 changedPaths
= ChangedPaths(revFile
)
1091 dumpChangedPaths(changedPaths
)
1096 match
= re
.match('([0-9]+)', os
.path
.basename(filename
))
1097 currentRev
= int(match
.group(1), 10)
1099 raise CmdlineError
, \
1100 "The file name must start with a decimal number that indicates the revision"
1102 if options
.noderevRegexp
:
1103 strategy
= RegexpStrategy(filename
, root
, currentRev
)
1105 strategy
= ClassicStrategy(filename
, root
, currentRev
)
1107 # Make stderr the same as stdout. This helps when trying to catch all of the
1108 # output from a run.
1109 sys
.stderr
= sys
.stdout
1112 for noderev
in strategy
:
1114 if options
.truncate
:
1115 # Check to see if this is the rev we need to truncate
1116 if options
.truncate
== noderev
.id:
1117 truncate(noderev
, revFile
)
1122 if not options
.noVerify
:
1124 noderev
.text
.verify(revFile
,
1125 options
.dumpInstructions
,
1126 options
.dumpWindows
)
1128 if noderev
.props
and noderev
.props
.rev
== noderev
.props
.currentRev
:
1129 noderev
.props
.verify(revFile
,
1130 options
.dumpInstructions
,
1131 options
.dumpWindows
)
1137 except InvalidRepHeader
, e
:
1138 if not options
.fixRlle
:
1139 handleError(e
, options
.showTraceback
)
1141 fixHeader(e
, revFile
)
1143 except PotentiallyFixableException
, e
:
1144 if not options
.fixRlle
:
1145 handleError(e
, options
.showTraceback
)
1147 fixStream(e
, revFile
)