2 # Copyright (c) 2006, 2007 by John Szakmeister <john at szakmeister dot net>
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 # A handy constant for refering to the NULL digest (one that
25 # matches every digest).
26 NULL_DIGEST
= '00000000000000000000000000000000'
29 class FsfsVerifyException(Exception):
33 class PotentiallyFixableException(FsfsVerifyException
):
34 '''Represents a class of problems that we may be able to fix.'''
36 def __init__(self
, message
, offset
):
37 FsfsVerifyException
.__init
__(self
, message
)
41 class InvalidInstruction(PotentiallyFixableException
):
45 class InvalidCompressedStream(PotentiallyFixableException
):
49 class InvalidRepHeader(PotentiallyFixableException
):
53 class InvalidWindow(PotentiallyFixableException
):
57 class InvalidSvndiffVersion(FsfsVerifyException
):
61 class InvalidSvndiffHeader(FsfsVerifyException
):
65 class DataCorrupt(FsfsVerifyException
):
69 class NoMoreData(FsfsVerifyException
):
77 LOG_MASK
= LOG_SVNDIFF
80 def log(type, indent
, format
, *args
):
82 indentStr
= ' ' * indent
84 str = '\n'.join([indentStr
+ x
for x
in str.split('\n')])
88 class ByteStream(object):
89 def __init__(self
, fileobj
):
93 return ord(self
._f
.read(1))
98 def advance(self
, numBytes
):
99 self
._f
.seek(numBytes
, 1)
102 if hasattr(self
._f
, 'clone'):
103 newFileObj
= self
._f
.clone()
105 # We expect the file object to map to a real file
107 # Tried using dup(), but (at least on the mac), that ends up
108 # creating 2 handles to the same underlying os file object,
109 # instead of two independent file objects. So, we resort to
110 # an open call to create a new file object
111 newFileObj
= open(self
._f
.name
, 'rb')
112 newFileObj
.seek(self
._f
.tell())
113 return ByteStream(newFileObj
)
115 # The following let ByteStream behave as a file within the
116 # context of this script.
118 def read(self
, *args
, **kwargs
):
119 return self
._f
.read(*args
, **kwargs
)
121 def seek(self
, *args
, **kwargs
):
122 return self
._f
.seek(*args
, **kwargs
)
125 class ZlibByteStream(ByteStream
):
126 def __init__(self
, fileobj
, length
):
129 # Store the number of bytes consumed thus far so we can compute an offset
130 self
._numBytesConsumed
= 0
132 self
._startingOffset
= self
._f
.tell()
134 import zlib
, binascii
135 self
._z
= zlib
.decompressobj(15)
137 self
._buffer
= self
._z
.decompress(self
._f
.read(length
))
138 self
._origBufferLength
= len(self
._buffer
)
142 raise NoMoreData
, "Unexpected end of data stream!"
144 byte
= self
._buffer
[0]
145 self
._buffer
= self
._buffer
[1:]
150 return self
._origBufferLength
- len(self
._buffer
)
152 def advance(self
, numBytes
):
157 if hasattr(self
._f
, 'clone'):
158 newFileObj
= self
._f
.clone()
160 newFileObj
= open(self
._f
.name
, 'rb')
161 newFileObj
.seek(self
._f
.tell())
162 return ByteStream(newFileObj
)
164 # The following let ByteStream behave as a file within the
165 # context of this script.
167 def read(self
, *args
, **kwargs
):
170 def seek(self
, *args
, **kwargs
):
174 def getVarint(byteStream
):
175 '''Grabs a variable sized int from a bitstream (meaning this function
180 byte
= byteStream
.readByte()
181 i
= (i
<< 7) + (byte
& 0x7F)
187 INSTR_COPY_SOURCE
= 'copy-source'
188 INSTR_COPY_TARGET
= 'copy-target'
189 INSTR_COPY_DATA
= 'copy-data'
192 class SvndiffInstruction(object):
193 def __init__(self
, byteStream
):
194 self
.instrOffset
= byteStream
.tell()
196 byte
= byteStream
.readByte()
198 instruction
= (byte
>> 6) & 3
202 raise InvalidInstruction(
203 "Invalid instruction found at offset %d (%02X)" % (self
.instrOffset
,
208 self
.type = INSTR_COPY_SOURCE
209 elif instruction
== 1:
210 self
.type = INSTR_COPY_TARGET
212 self
.type = INSTR_COPY_DATA
215 # Length is coded as a varint following the current byte
216 length
= getVarint(byteStream
)
221 if (self
.type == INSTR_COPY_SOURCE
) or (self
.type == INSTR_COPY_TARGET
):
222 self
.offset
= getVarint(byteStream
)
224 if self
.type == INSTR_COPY_SOURCE
:
225 self
.sourceOffset
= self
.offset
227 self
.sourceOffset
= 0
229 if self
.type == INSTR_COPY_TARGET
:
230 self
.targetOffset
= self
.offset
232 self
.targetOffset
= 0
234 # Determine the number of bytes consumed in the source stream, target
235 # stream, and the data stream
237 if self
.type == INSTR_COPY_SOURCE
:
238 self
.sourceLength
= self
.length
240 self
.sourceLength
= 0
242 if self
.type == INSTR_COPY_TARGET
:
243 self
.targetLength
= self
.length
245 self
.targetLength
= 0
247 if self
.type == INSTR_COPY_DATA
:
248 self
.dataLength
= self
.length
252 self
.instrLength
= byteStream
.tell() - self
.instrOffset
255 return '<SvndiffInstruction %s so:%d sl:%d to: %d tl:%d dl:%d (%d, %d)>' % (
256 self
.type, self
.sourceOffset
, self
.sourceLength
, self
.targetOffset
,
257 self
.targetLength
, self
.dataLength
, self
.instrOffset
, self
.instrLength
)
260 class Window(object):
261 def __init__(self
, byteStream
, svndiffVersion
):
262 if svndiffVersion
not in [0, 1]:
263 raise InvalidSvndiffVersion
, \
264 "Invalid svndiff version %d" % svndiffVersion
266 # Record the initial offset of the window
267 self
.windowOffset
= byteStream
.tell()
270 self
.sourceOffset
= getVarint(byteStream
)
271 self
.sourceLength
= getVarint(byteStream
)
272 self
.targetLength
= getVarint(byteStream
)
273 self
.instrLength
= getVarint(byteStream
)
274 self
.dataLength
= getVarint(byteStream
)
275 self
.windowHeaderLength
= byteStream
.tell() - self
.windowOffset
276 self
.windowLength
= \
277 self
.windowHeaderLength
+ self
.instrLength
+ self
.dataLength
279 # Store the byte stream, and clone it for use as a data stream.
280 self
.instrByteStream
= byteStream
281 self
.dataByteStream
= byteStream
.clone()
283 # Advance the data stream past the instructions to the start of the data.
284 self
.dataByteStream
.advance(self
.instrLength
)
287 "The window header at offset %d appears to be corrupted" % \
290 e
.windowOffset
= self
.windowOffset
294 # In svndiff1, the instruction area starts with a varint-encoded length.
295 # If this length matches the one encoded in the header, then there is no
296 # compression. If it differs, then the stream is compressed with zlib.
298 self
.origInstrStream
= self
.instrByteStream
299 self
.origDataStream
= self
.dataByteStream
300 self
.isInstrCompressed
= False
301 self
.isDataCompressed
= False
302 self
.compressedInstrLength
= self
.instrLength
303 self
.compressedDataLength
= self
.dataLength
305 if svndiffVersion
== 1:
307 offset
= self
.instrByteStream
.tell()
308 encodedInstrLength
= getVarint(self
.instrByteStream
)
309 instrIntSize
= self
.instrByteStream
.tell() - offset
311 offset
= self
.dataByteStream
.tell()
312 encodedDataLength
= getVarint(self
.dataByteStream
)
313 dataIntSize
= self
.dataByteStream
.tell() - offset
315 self
.instrLength
= encodedInstrLength
316 self
.dataLength
= encodedDataLength
319 "The window header at offset %d appears to be corrupted" % \
322 e
.windowOffset
= self
.windowOffset
325 # Now, we need to make a determination about whether the data and
326 # instructions are compressed. If they are, we need to zlib decompress
327 # them. We do that by creating another stream and that will decompress
328 # the data on the fly.
330 offset
= self
.instrByteStream
.tell()
331 if self
.compressedInstrLength
- instrIntSize
!= self
.instrLength
:
332 self
.origInstrStream
= self
.instrByteStream
333 self
.instrByteStream
= ZlibByteStream(self
.origInstrStream
,
334 self
.compressedInstrLength
)
335 self
.isInstrCompressed
= True
337 new_e
= InvalidCompressedStream(
338 "Invalid compressed instr stream at offset %d (%s)" % (offset
,
341 new_e
.windowOffset
= self
.windowOffset
345 offset
= self
.dataByteStream
.tell()
346 if self
.compressedDataLength
- dataIntSize
!= self
.dataLength
:
347 self
.origDataStream
= self
.dataByteStream
348 self
.dataByteStream
= ZlibByteStream(self
.origDataStream
,
349 self
.compressedDataLength
)
350 self
.isDataCompressed
= True
352 new_e
= InvalidCompressedStream(
353 "Invalid compressed data stream at offset %d (%s)" % (offset
,
356 new_e
.windowOffset
= self
.windowOffset
360 expectedInstrLength
= self
.instrLength
361 expectedDataLength
= self
.dataLength
362 expectedTargetLength
= self
.targetLength
363 expectedSourceLength
= self
.sourceLength
365 computedInstrLength
= 0
366 computedDataLength
= 0
367 computedTargetLength
= 0
368 computedSourceLength
= 0
370 if expectedInstrLength
== 0:
372 "Corrupt window (at offset %d) has 0 instructions?!" % self
.windowOffset
,
374 e
.windowOffset
= self
.windowOffset
377 while computedInstrLength
< expectedInstrLength
:
379 instr
= SvndiffInstruction(self
.instrByteStream
)
380 except PotentiallyFixableException
, e
:
382 e
.windowOffset
= self
.windowOffset
385 log(LOG_INSTRUCTIONS
, 4, repr(instr
))
387 computedInstrLength
+= instr
.instrLength
388 computedDataLength
+= instr
.dataLength
389 computedSourceLength
+= instr
.sourceLength
390 computedTargetLength
+= \
391 instr
.targetLength
+ instr
.sourceLength
+ instr
.dataLength
393 if computedInstrLength
!= expectedInstrLength
:
395 "The number of instruction bytes consumed (%d) doesn't match the expected number (%d)" % \
396 (computedInstrLength
, expectedInstrLength
),
398 e
.windowOffset
= self
.windowOffset
401 if computedDataLength
!= expectedDataLength
:
403 "The number of data bytes consumed (%d) doesn't match the expected number (%d)" % \
404 (computedDataLength
, expectedDataLength
),
406 e
.windowOffset
= self
.windowOffset
409 if computedTargetLength
!= expectedTargetLength
:
411 "The number of target bytes consumed (%d) doesn't match the expected number (%d)" % \
412 (computedTargetLength
, expectedTargetLength
),
414 e
.windowOffset
= self
.windowOffset
417 # It appears that the source length specified in the window, isn't exactly
418 # equal to what gets consumed. I suspect that's because the algorithm is using different
419 # offsets within the window, and one offset/length pair will reach the end of the window.
420 # However, this hasn't shown to be a clear indicator of corruption. So for now, I'm
423 #if computedSourceLength != expectedSourceLength:
425 # "The number of source bytes consumed (%d) doesn't match the expected number (%d)" % \
426 # (computedSourceLength, expectedSourceLength),
428 # e.windowOffset = self.windowOffset
431 # Advance past the data. We do this using seek because we might have
432 # read a few bytes from the stream if it potentially had compressed data
433 self
.origInstrStream
.seek(self
.windowOffset
+ self
.windowLength
)
436 if hasattr(self
, 'compressedInstrLength'):
437 str = 'cil: %d cdl: %d ' % (self
.compressedInstrLength
,
438 self
.compressedDataLength
)
442 return "<Window wo:%d so:%d sl:%d tl:%d %sil:%d dl:%d whl:%d wl:%d>" % (
443 self
.windowOffset
, self
.sourceOffset
, self
.sourceLength
,
444 self
.targetLength
, str, self
.instrLength
, self
.dataLength
,
445 self
.windowHeaderLength
, self
.windowLength
)
448 class Svndiff(object):
449 def __init__(self
, fileobj
, length
):
451 self
.startingOffset
= self
._f
.tell()
453 header
= self
._f
.read(4)
456 "Unexpected end of file while svndiff header at offset %d)" % \
459 if header
[0:3] != 'SVN':
460 raise InvalidSvndiffHeader
, "Invalid svndiff header at offset %d" % \
461 (self
.startingOffset
)
463 self
.version
= ord(header
[3])
464 if self
.version
not in [0, 1]:
465 raise InvalidSvndiffVersion
, "Invalid svndiff version %d" % self
.version
467 self
._length
= length
- 4
470 self
._f
.seek(self
.startingOffset
+4)
472 bs
= ByteStream(self
._f
)
474 log(LOG_SVNDIFF
, 2, "<Svndiff so: %d ver: %d>", self
.startingOffset
,
478 remaining
= self
._length
480 w
= Window(bs
, self
.version
)
481 log(LOG_WINDOWS
, 3, repr(w
))
483 remaining
-= w
.windowLength
484 except PotentiallyFixableException
, e
:
485 e
.svndiffStart
= self
.startingOffset
492 raise ValueError, "Expected a PLAIN representation (%d)" % f
.tell()
497 field
= f
.readline()[:-1]
500 assert(field
[0] == 'K')
501 length
= int(field
.split(' ')[1])
502 field
= f
.readline()[:length
]
504 value
= f
.readline()[:-1]
505 assert(value
[0] == 'V')
506 length
= int(value
.split(' ')[1])
507 value
= f
.readline()[:length
]
509 (type, txn
) = value
.split(' ')
510 hash[field
] = [NodeType(type), NodeId(txn
)]
517 def __init__(self
, type, rev
, offset
, length
, size
, digest
,
518 contentType
, currentRev
, noderev
):
521 self
.offset
= int(offset
)
522 self
.length
= int(length
)
523 self
.size
= int(size
)
526 self
.currentRev
= currentRev
528 self
.contentType
= contentType
529 self
.noderev
= noderev
532 if not self
.contentType
:
533 contentType
= 'UNKNOWN'
535 if self
.contentType
not in ['PLAIN', 'DELTA', None]:
536 contentType
= 'INVALID'
538 contentType
= self
.contentType
539 return '%s: %s %d %d %d %d %s' % (self
.type, contentType
, self
.rev
,
540 self
.offset
, self
.length
, self
.size
,
543 def verify(self
, f
, dumpInstructions
, dumpWindows
):
544 if self
.contentType
not in ['PLAIN', 'DELTA', None]:
545 e
= InvalidRepHeader("Invalid rep header found at %d (%s)!" % \
546 (self
.offset
, self
.contentType
),
549 e
.noderev
= self
.noderev
552 if self
.rev
!= currentRev
:
553 print >>sys
.stderr
, "Skipping text rep since it isn't present in the current rev"
558 if header
!= self
.contentType
:
559 raise FsfsVerifyException
, \
560 "Invalid rep header found at %d (%s, %s)!" % (self
.offset
, header
,
563 if header
== 'DELTA':
564 # Consume the rest of the DELTA header
565 while f
.read(1) != '\n':
568 # This should be the start of the svndiff stream
569 actual_start
= f
.tell()
571 svndiff
= Svndiff(f
, self
.length
)
576 e
.noderev
= self
.noderev
579 if digest
and (self
.digest
!= NULL_DIGEST
):
580 assert(digest
== self
.digest
)
582 if f
.read(1) != '\n':
583 raise DataCorrupt
, "Expected a '\\n' after PLAIN"
587 m
.update(f
.read(self
.length
))
589 if self
.digest
and self
.digest
!= NULL_DIGEST \
590 and self
.digest
!= m
.hexdigest():
592 "PLAIN data is corrupted. Expected digest '%s', computed '%s'." % (
593 self
.digest
, m
.hexdigest())
595 if f
.read(7) != 'ENDREP\n':
596 raise DataCorrupt
, "Terminating ENDREP missing!"
600 def __init__(self
, rev
, offset
, length
, size
, digest
,
601 contentType
, currentRev
, noderev
):
602 super(TextRep
,self
).__init
__('text', rev
, offset
, length
, size
,
603 digest
, contentType
, currentRev
, noderev
)
607 def __init__(self
, rev
, offset
, length
, size
, digest
,
608 contentType
, currentRev
, noderev
):
609 super(PropRep
,self
).__init
__('prop', rev
, offset
, length
, size
,
610 digest
, contentType
, currentRev
, noderev
)
613 class NodeId(object):
614 def __init__(self
, nodeid
):
615 (self
.txn_name
, offset
) = nodeid
.split('/')
616 self
.offset
= int(offset
)
617 self
.rev
= int(self
.txn_name
.split('.')[2][1:])
620 return self
.txn_name
+ '/%d' % self
.offset
622 def __eq__ (self
, other
):
623 s
= self
.txn_name
+ '/%d' % self
.offset
630 class NodeType(object):
631 def __init__(self
, t
):
632 if (t
!= 'file') and (t
!= 'dir'):
633 raise ValueError, 'Invalid Node type received: "%s"' % t
640 class NodeRev(object):
641 def __init__(self
, f
, currentRev
):
650 self
.nodeOffset
= f
.tell()
655 raise IOError, "Unexpected end of file"
659 # break apart the line
661 (field
, value
) = line
.split(':', 1)
664 print self
.nodeOffset
668 # pull of the leading space and trailing new line
672 self
.id = NodeId(value
)
673 elif field
== 'type':
674 self
.type = NodeType(value
)
675 elif field
== 'pred':
676 self
.pred
= NodeId(value
)
677 elif field
== 'text':
678 (rev
, offset
, length
, size
, digest
) = value
.split(' ')
684 if rev
!= currentRev
:
687 savedOffset
= f
.tell()
689 contentType
= f
.read(5)
692 self
.text
= TextRep(rev
, offset
, length
, size
, digest
,
693 contentType
, currentRev
, self
)
694 elif field
== 'props':
695 (rev
, offset
, length
, size
, digest
) = value
.split(' ')
701 if rev
!= currentRev
:
704 savedOffset
= f
.tell()
706 contentType
= f
.read(5)
709 self
.props
= PropRep(rev
, offset
, length
, size
, digest
,
710 contentType
, currentRev
, self
)
711 elif field
== 'cpath':
713 elif field
== 'copyroot':
714 self
.copyroot
= value
715 elif field
== 'copyfrom':
716 self
.copyfrom
= value
718 if self
.type.type == 'dir':
720 if self
.id.rev
== self
.text
.rev
:
722 f
.seek(self
.text
.offset
)
723 self
.dir = getDirHash(f
)
726 # The directory entries are stored in another file.
727 print "Warning: dir entries are stored in rev %d for noderev %s" % (
728 self
.text
.rev
, repr(self
.id))
731 str = 'NodeRev Id: %s\n type: %s\n' % (repr(self
.id), repr(self
.type))
733 str = str + ' pred: %s\n' % repr(self
.pred
)
735 str = str + ' %s\n' % repr(self
.text
)
737 str = str + ' %s\n' % repr(self
.props
)
739 str = str + ' cpath: %s\n' % self
.cpath
741 str = str + ' copyroot: %s\n' % self
.copyroot
743 str = str + ' copyfrom: %s\n' % self
.copyfrom
745 str = str + ' dir contents:\n'
747 str = str + ' %s: %s\n' % (k
, self
.dir[k
])
751 class ChangedPaths(object):
752 def __init__(self
, f
):
753 self
.changedPaths
= {}
756 currentOffset
= revFile
.tell()
757 action
= revFile
.readline()
758 if action
== '\n' or action
== '':
763 (id, action
, textMod
, propMod
) = action
[:-1].split(' ')[:4]
766 "Data appears to be corrupt at offset %d" % currentOffset
767 path
= path
[len(' '.join([id, action
, textMod
, propMod
]))+1:]
769 line
= revFile
.readline()
771 (copyfromRev
, copyfromPath
) = line
[:-1].split(' ', 1)
776 self
.changedPaths
[path
] = (id, action
, textMod
, propMod
,
777 copyfromRev
, copyfromPath
)
781 return self
.changedPaths
.iteritems()
784 def getRootAndChangedPaths(revFile
):
787 revFile
.seek(offset
, 2)
790 offset
= revFile
.tell()
794 (rootNode
, changedPaths
) = map(int, revFile
.readline().split(' '))
796 return (rootNode
, changedPaths
)
799 def dumpChangedPaths(changedPaths
):
800 print "Changed Path Information:"
802 (id, action
, textMod
, propMod
,
803 copyfromRev
, copyfromPath
)) in changedPaths
:
806 print " action: %s" % action
807 print " text mod: %s" % textMod
808 print " prop mod: %s" % propMod
809 if copyfromRev
!= -1:
810 print " copyfrom path: %s" % copyfromPath
811 print " copyfrom rev: %s" % copyfromRev
815 class WalkStrategy(object):
816 def __init__(self
, filename
, rootOffset
, currentRev
):
817 self
.f
= open(filename
, 'rb')
818 self
.rootOffset
= rootOffset
819 self
.f
.seek(rootOffset
)
820 self
.currentRev
= currentRev
822 def _nodeWalker(self
):
823 raise NotImplementedError, "_nodeWalker is not implemented"
826 self
.f
.seek(self
.rootOffset
)
827 return self
._nodeWalker
()
830 class ClassicStrategy(WalkStrategy
):
831 def _nodeWalker (self
):
832 noderev
= NodeRev(self
.f
, self
.currentRev
)
835 if noderev
.type.type == 'dir':
836 for e
in noderev
.dir:
837 if noderev
.dir[e
][1].rev
== noderev
.id.rev
:
838 self
.f
.seek(noderev
.dir[e
][1].offset
)
839 for x
in self
._nodeWalker
():
843 class RegexpStrategy(WalkStrategy
):
844 def __init__(self
, filename
, rootOffset
, currentRev
):
845 WalkStrategy
.__init
__(self
, filename
, rootOffset
, currentRev
)
847 # File object passed to the NodeRev() constructor so that it
848 # doesn't interfere with our regex search.
849 self
.nodeFile
= open(filename
, 'rb')
851 def _nodeWalker(self
):
852 nodeId_re
= re
.compile(r
'^id: [a-z0-9\./]+$')
858 match
= nodeId_re
.search(line
)
860 self
.nodeFile
.seek(offset
)
861 noderev
= NodeRev(self
.nodeFile
, self
.currentRev
)
864 offset
= offset
+ len(line
)
867 def verify(noderev
, revFile
, dumpInstructions
, dumpWindows
):
871 noderev
.text
.verify(revFile
,
875 if noderev
.props
and noderev
.props
.rev
== noderev
.props
.currentRev
:
876 noderev
.props
.verify(revFile
,
883 def truncate(noderev
, revFile
):
886 print "Truncating node %s (%s)" % (txnId
, noderev
.cpath
)
889 textRep
= noderev
.text
891 # Fix the text rep contents
892 offset
= textRep
.offset
893 revFile
.seek(offset
, 0)
894 revFile
.write("PLAIN\x0aENDREP\x0a")
897 offset
= noderev
.nodeOffset
898 revFile
.seek(offset
, 0)
900 savedOffset
= revFile
.tell()
901 s
= revFile
.readline()
903 revFile
.seek(savedOffset
, 0)
906 line
= revFile
.readline()
907 revFile
.seek(savedOffset
, 0)
908 fields
= line
.split(' ')
909 overallLength
= len(line
)
911 fields
[3] = '0' * len(fields
[3])
912 fields
[4] = '0' * len(fields
[4])
913 fields
[5] = 'd41d8cd98f00b204e9800998ecf8427e'
914 newTextRep
= ' '.join(fields
) + '\x0a'
915 assert(len(newTextRep
) == overallLength
)
916 revFile
.write(newTextRep
)
921 def fixHeader(e
, revFile
):
922 '''Attempt to fix the rep header. e is expected to be of type
923 InvalidRepHeader, since the exception stores the necessary information
924 to help repair the file.'''
926 # First, we need to locate the real start of the text rep
927 textrep_re
= re
.compile(r
'^(DELTA( \d+ \d+ \d+)?|PLAIN)$')
933 m
= textrep_re
.match(line
)
935 if offset
>= originalOffset
and offset
< e
.offset
:
936 originalOffset
= offset
937 headerLen
= len(line
)
938 offset
= offset
+ len(line
)
940 print "Original text rep located at", originalOffset
942 # Okay, now we have the original offset of the text rep that was
943 # in the process of being written out. The header portion of the
944 # text rep has a fsync() done after it, so the 4K blocks actually
945 # start after the header. We need to make sure to copy the header
946 # and the next 4K, to be on the safe side.
947 copyLen
= 4096 + headerLen
949 revFile
.seek(originalOffset
)
950 block
= revFile
.read(copyLen
)
951 print "Copy %d bytes from offset %d" % (copyLen
, originalOffset
)
953 print "Write %d bytes at offset %d" % (copyLen
, e
.offset
)
954 revFile
.seek(e
.offset
)
958 print "Fixed? :-) Re-run fsfsverify without the -f option"
961 def fixStream(e
, revFile
):
962 startOffset
= e
.svndiffStart
963 errorOffset
= e
.windowOffset
965 repeatedBlockOffset
= errorOffset
- ((errorOffset
- startOffset
) % 4096)
967 # Now we need to move up the rest of the rep
969 # Determine the final offset by finding the end of the rep.
970 revFile
.seek(errorOffset
)
972 endrep_re
= re
.compile(".*ENDREP$")
976 m
= endrep_re
.match(l
)
981 raise "Couldn't find end of rep!"
983 finalOffset
= errorOffset
+ srcLength
984 srcOffset
= errorOffset
985 destOffset
= repeatedBlockOffset
987 print "Copy %d bytes from offset %d" % (srcLength
, srcOffset
)
988 print "Write %d bytes at offset %d" % (srcLength
, destOffset
)
990 while srcOffset
< finalOffset
:
992 if (finalOffset
- srcOffset
) < blen
:
993 blen
= finalOffset
- srcOffset
994 revFile
.seek(srcOffset
)
995 block
= revFile
.read(blen
)
996 revFile
.seek(destOffset
)
1005 print "Fixed? :-) Re-run fsfsverify without the -f option"
1008 def checkOptions(options
):
1010 for k
,v
in options
.__dict
__.items():
1011 if v
and (k
in ['dumpChanged', 'truncate', 'fixRlle']):
1015 print >>sys
.stderr
, "Please use only one of -c, -f, and -t."
1018 if options
.dumpChanged
and (options
.dumpWindows
or options
.dumpInstructions
):
1019 print >>sys
.stderr
, \
1020 "-c is incompatible with -w and -i. Dropping -w and/or -i."
1022 if options
.noVerify
and (options
.dumpWindows
or options
.dumpInstructions
):
1023 print >>sys
.stderr
, \
1024 "--no-verify is incompatible with -w and -i. Dropping -w and/or -i."
1027 def handleError(error
, withTraceback
=False):
1031 traceback
.print_exc()
1033 print >>sys
.stderr
,"Error %s: %s" % (error
.__class
__.__name
__, str(e
))
1034 print >>sys
.stderr
,"Try running with -f to fix the revision"
1038 if __name__
== '__main__':
1039 from optparse
import OptionParser
1041 parser
= OptionParser("usage: %prog [-w | -i | -r | -n] REV-FILE")
1042 parser
.add_option("-c", "--changed-paths",
1043 action
="store_true", dest
="dumpChanged",
1044 help="Dump changed path information", default
=False)
1045 parser
.add_option("", "--no-verify",
1046 action
="store_true", dest
="noVerify",
1047 help="Don't parse svndiff streams.", default
=False)
1048 parser
.add_option("-i", "--instructions",
1049 action
="store_true", dest
="dumpInstructions",
1050 help="Dump instructions (implies -w)", default
=False)
1051 parser
.add_option("-w", "--windows",
1052 action
="store_true", dest
="dumpWindows",
1053 help="Dump windows", default
=False)
1054 parser
.add_option("-n", "--noderev-regexp",
1055 action
="store_true", dest
="noderevRegexp",
1056 help="Find all noderevs using a regexp", default
=False)
1057 parser
.add_option("-f", "--fix-read-length-line-error",
1058 action
="store_true", dest
="fixRlle",
1059 help="Attempt to fix the read length line error",
1061 parser
.add_option("-t", "--truncate",
1062 action
="store", type="string", dest
="truncate",
1063 help="Truncate the specified node rev.",
1065 parser
.add_option("", "--traceback",
1066 action
="store_true", dest
="showTraceback",
1067 help="Show error tracebacks (mainly used for debugging).",
1070 (options
, args
) = parser
.parse_args()
1073 print >>sys
.stderr
, "Please specify exactly one rev file."
1077 checkOptions(options
)
1081 if options
.dumpInstructions
:
1082 options
.dumpWindows
= True
1083 LOG_MASK |
= LOG_INSTRUCTIONS
1085 if options
.dumpWindows
:
1086 LOG_MASK |
= LOG_WINDOWS
1088 if options
.truncate
or options
.fixRlle
:
1089 revFile
= open(filename
, 'r+b')
1091 revFile
= open(filename
, 'rb')
1093 (root
, changed
) = getRootAndChangedPaths(revFile
)
1095 if options
.dumpChanged
:
1096 revFile
.seek(changed
)
1097 changedPaths
= ChangedPaths(revFile
)
1099 dumpChangedPaths(changedPaths
)
1104 match
= re
.match('([0-9]+)', os
.path
.basename(filename
))
1105 currentRev
= int(match
.group(1), 10)
1107 raise CmdlineError
, \
1108 "The file name must start with a decimal number that indicates the revision"
1110 if options
.noderevRegexp
:
1111 strategy
= RegexpStrategy(filename
, root
, currentRev
)
1113 strategy
= ClassicStrategy(filename
, root
, currentRev
)
1115 # Make stderr the same as stdout. This helps when trying to catch all of the
1116 # output from a run.
1117 sys
.stderr
= sys
.stdout
1120 for noderev
in strategy
:
1122 if options
.truncate
:
1123 # Check to see if this is the rev we need to truncate
1124 if options
.truncate
== noderev
.id:
1125 truncate(noderev
, revFile
)
1130 if not options
.noVerify
:
1132 noderev
.text
.verify(revFile
,
1133 options
.dumpInstructions
,
1134 options
.dumpWindows
)
1136 if noderev
.props
and noderev
.props
.rev
== noderev
.props
.currentRev
:
1137 noderev
.props
.verify(revFile
,
1138 options
.dumpInstructions
,
1139 options
.dumpWindows
)
1145 except InvalidRepHeader
, e
:
1146 if not options
.fixRlle
:
1147 handleError(e
, options
.showTraceback
)
1149 fixHeader(e
, revFile
)
1151 except PotentiallyFixableException
, e
:
1152 if not options
.fixRlle
:
1153 handleError(e
, options
.showTraceback
)
1155 fixStream(e
, revFile
)