Fix compiler warning due to missing function prototype.
[svn.git] / contrib / server-side / fsfsverify.py
blob2d0be8a2d586cdcce95880fcb7bae11ce9d4a24f
1 #!/usr/bin/env python
2 # Copyright (c) 2006, 2007 by John Szakmeister <john at szakmeister dot net>
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 import os
19 import optparse
20 import sys
21 import re
24 # A handy constant for refering to the NULL digest (one that
25 # matches every digest).
26 NULL_DIGEST = '00000000000000000000000000000000'
29 class FsfsVerifyException(Exception):
30 pass
33 class PotentiallyFixableException(FsfsVerifyException):
34 '''Represents a class of problems that we may be able to fix.'''
36 def __init__(self, message, offset):
37 FsfsVerifyException.__init__(self, message)
38 self.offset = offset
41 class InvalidInstruction(PotentiallyFixableException):
42 pass
45 class InvalidCompressedStream(PotentiallyFixableException):
46 pass
49 class InvalidRepHeader(PotentiallyFixableException):
50 pass
53 class InvalidWindow(PotentiallyFixableException):
54 pass
57 class InvalidSvndiffVersion(FsfsVerifyException):
58 pass
61 class InvalidSvndiffHeader(FsfsVerifyException):
62 pass
65 class DataCorrupt(FsfsVerifyException):
66 pass
69 class NoMoreData(FsfsVerifyException):
70 pass
73 LOG_INSTRUCTIONS = 1
74 LOG_WINDOWS = 2
75 LOG_SVNDIFF = 4
77 LOG_MASK = LOG_SVNDIFF
80 def log(type, indent, format, *args):
81 if type & LOG_MASK:
82 indentStr = ' ' * indent
83 str = format % args
84 str = '\n'.join([indentStr + x for x in str.split('\n')])
85 print str
88 class ByteStream(object):
89 def __init__(self, fileobj):
90 self._f = fileobj
92 def readByte(self):
93 return ord(self._f.read(1))
95 def tell(self):
96 return self._f.tell()
98 def advance(self, numBytes):
99 self._f.seek(numBytes, 1)
101 def clone(self):
102 if hasattr(self._f, 'clone'):
103 newFileObj = self._f.clone()
104 else:
105 # We expect the file object to map to a real file
107 # Tried using dup(), but (at least on the mac), that ends up
108 # creating 2 handles to the same underlying os file object,
109 # instead of two independent file objects. So, we resort to
110 # an open call to create a new file object
111 newFileObj = open(self._f.name, 'rb')
112 newFileObj.seek(self._f.tell())
113 return ByteStream(newFileObj)
115 # The following let ByteStream behave as a file within the
116 # context of this script.
118 def read(self, *args, **kwargs):
119 return self._f.read(*args, **kwargs)
121 def seek(self, *args, **kwargs):
122 return self._f.seek(*args, **kwargs)
125 class ZlibByteStream(ByteStream):
126 def __init__(self, fileobj, length):
127 self._f = fileobj
129 # Store the number of bytes consumed thus far so we can compute an offset
130 self._numBytesConsumed = 0
132 self._startingOffset = self._f.tell()
134 import zlib, binascii
135 self._z = zlib.decompressobj(15)
137 self._buffer = self._z.decompress(self._f.read(length))
138 self._origBufferLength = len(self._buffer)
140 def readByte(self):
141 if not self._buffer:
142 raise NoMoreData, "Unexpected end of data stream!"
144 byte = self._buffer[0]
145 self._buffer = self._buffer[1:]
147 return ord(byte)
149 def tell(self):
150 return self._origBufferLength - len(self._buffer)
152 def advance(self, numBytes):
153 while numBytes:
154 self.readByte()
156 def clone(self):
157 if hasattr(self._f, 'clone'):
158 newFileObj = self._f.clone()
159 else:
160 newFileObj = open(self._f.name, 'rb')
161 newFileObj.seek(self._f.tell())
162 return ByteStream(newFileObj)
164 # The following let ByteStream behave as a file within the
165 # context of this script.
167 def read(self, *args, **kwargs):
168 raise
170 def seek(self, *args, **kwargs):
171 raise
174 def getVarint(byteStream):
175 '''Grabs a variable sized int from a bitstream (meaning this function
176 doesn't seek).'''
178 i = long(0)
179 while True:
180 byte = byteStream.readByte()
181 i = (i << 7) + (byte & 0x7F)
182 if byte & 0x80 == 0:
183 break
184 return i
187 INSTR_COPY_SOURCE = 'copy-source'
188 INSTR_COPY_TARGET = 'copy-target'
189 INSTR_COPY_DATA = 'copy-data'
192 class SvndiffInstruction(object):
193 def __init__(self, byteStream):
194 self.instrOffset = byteStream.tell()
196 byte = byteStream.readByte()
198 instruction = (byte >> 6) & 3
199 length = byte & 0x3F
201 if instruction == 3:
202 raise InvalidInstruction(
203 "Invalid instruction found at offset %d (%02X)" % (self.instrOffset,
204 byte),
205 self.instrOffset)
207 if instruction == 0:
208 self.type = INSTR_COPY_SOURCE
209 elif instruction == 1:
210 self.type = INSTR_COPY_TARGET
211 else:
212 self.type = INSTR_COPY_DATA
214 if length == 0:
215 # Length is coded as a varint following the current byte
216 length = getVarint(byteStream)
219 self.length = length
221 if (self.type == INSTR_COPY_SOURCE) or (self.type == INSTR_COPY_TARGET):
222 self.offset = getVarint(byteStream)
224 if self.type == INSTR_COPY_SOURCE:
225 self.sourceOffset = self.offset
226 else:
227 self.sourceOffset = 0
229 if self.type == INSTR_COPY_TARGET:
230 self.targetOffset = self.offset
231 else:
232 self.targetOffset = 0
234 # Determine the number of bytes consumed in the source stream, target
235 # stream, and the data stream
237 if self.type == INSTR_COPY_SOURCE:
238 self.sourceLength = self.length
239 else:
240 self.sourceLength = 0
242 if self.type == INSTR_COPY_TARGET:
243 self.targetLength = self.length
244 else:
245 self.targetLength = 0
247 if self.type == INSTR_COPY_DATA:
248 self.dataLength = self.length
249 else:
250 self.dataLength = 0
252 self.instrLength = byteStream.tell() - self.instrOffset
254 def __repr__(self):
255 return '<SvndiffInstruction %s so:%d sl:%d to: %d tl:%d dl:%d (%d, %d)>' % (
256 self.type, self.sourceOffset, self.sourceLength, self.targetOffset,
257 self.targetLength, self.dataLength, self.instrOffset, self.instrLength)
260 class Window(object):
261 def __init__(self, byteStream, svndiffVersion):
262 if svndiffVersion not in [0, 1]:
263 raise InvalidSvndiffVersion, \
264 "Invalid svndiff version %d" % svndiffVersion
266 # Record the initial offset of the window
267 self.windowOffset = byteStream.tell()
269 try:
270 self.sourceOffset = getVarint(byteStream)
271 self.sourceLength = getVarint(byteStream)
272 self.targetLength = getVarint(byteStream)
273 self.instrLength = getVarint(byteStream)
274 self.dataLength = getVarint(byteStream)
275 self.windowHeaderLength = byteStream.tell() - self.windowOffset
276 self.windowLength = \
277 self.windowHeaderLength + self.instrLength + self.dataLength
279 # Store the byte stream, and clone it for use as a data stream.
280 self.instrByteStream = byteStream
281 self.dataByteStream = byteStream.clone()
283 # Advance the data stream past the instructions to the start of the data.
284 self.dataByteStream.advance(self.instrLength)
285 except:
286 e = InvalidWindow(
287 "The window header at offset %d appears to be corrupted" % \
288 (self.windowOffset),
289 self.windowOffset)
290 e.windowOffset = self.windowOffset
291 raise e
294 # In svndiff1, the instruction area starts with a varint-encoded length.
295 # If this length matches the one encoded in the header, then there is no
296 # compression. If it differs, then the stream is compressed with zlib.
298 self.origInstrStream = self.instrByteStream
299 self.origDataStream = self.dataByteStream
300 self.isInstrCompressed = False
301 self.isDataCompressed = False
302 self.compressedInstrLength = self.instrLength
303 self.compressedDataLength = self.dataLength
305 if svndiffVersion == 1:
306 try:
307 offset = self.instrByteStream.tell()
308 encodedInstrLength = getVarint(self.instrByteStream)
309 instrIntSize = self.instrByteStream.tell() - offset
311 offset = self.dataByteStream.tell()
312 encodedDataLength = getVarint(self.dataByteStream)
313 dataIntSize = self.dataByteStream.tell() - offset
315 self.instrLength = encodedInstrLength
316 self.dataLength = encodedDataLength
317 except:
318 e = InvalidWindow(
319 "The window header at offset %d appears to be corrupted" % \
320 (self.windowOffset),
321 self.windowOffset)
322 e.windowOffset = self.windowOffset
323 raise e
325 # Now, we need to make a determination about whether the data and
326 # instructions are compressed. If they are, we need to zlib decompress
327 # them. We do that by creating another stream and that will decompress
328 # the data on the fly.
329 try:
330 offset = self.instrByteStream.tell()
331 if self.compressedInstrLength - instrIntSize != self.instrLength:
332 self.origInstrStream = self.instrByteStream
333 self.instrByteStream = ZlibByteStream(self.origInstrStream,
334 self.compressedInstrLength)
335 self.isInstrCompressed = True
336 except Exception, e:
337 new_e = InvalidCompressedStream(
338 "Invalid compressed instr stream at offset %d (%s)" % (offset,
339 str(e)),
340 offset)
341 new_e.windowOffset = self.windowOffset
342 raise new_e
344 try:
345 offset = self.dataByteStream.tell()
346 if self.compressedDataLength - dataIntSize != self.dataLength:
347 self.origDataStream = self.dataByteStream
348 self.dataByteStream = ZlibByteStream(self.origDataStream,
349 self.compressedDataLength)
350 self.isDataCompressed = True
351 except Exception, e:
352 new_e = InvalidCompressedStream(
353 "Invalid compressed data stream at offset %d (%s)" % (offset,
354 str(e)),
355 offset)
356 new_e.windowOffset = self.windowOffset
357 raise new_e
359 def verify(self):
360 expectedInstrLength = self.instrLength
361 expectedDataLength = self.dataLength
362 expectedTargetLength = self.targetLength
363 expectedSourceLength = self.sourceLength
365 computedInstrLength = 0
366 computedDataLength = 0
367 computedTargetLength = 0
368 computedSourceLength = 0
370 if expectedInstrLength == 0:
371 e = InvalidWindow(
372 "Corrupt window (at offset %d) has 0 instructions?!" % self.windowOffset,
373 self.windowOffset)
374 e.windowOffset = self.windowOffset
375 raise e
377 while computedInstrLength < expectedInstrLength:
378 try:
379 instr = SvndiffInstruction(self.instrByteStream)
380 except PotentiallyFixableException, e:
381 e.window = self
382 e.windowOffset = self.windowOffset
383 raise
385 log(LOG_INSTRUCTIONS, 4, repr(instr))
387 computedInstrLength += instr.instrLength
388 computedDataLength += instr.dataLength
389 computedSourceLength += instr.sourceLength
390 computedTargetLength += \
391 instr.targetLength + instr.sourceLength + instr.dataLength
393 if computedInstrLength != expectedInstrLength:
394 e = InvalidWindow(
395 "The number of instruction bytes consumed (%d) doesn't match the expected number (%d)" % \
396 (computedInstrLength, expectedInstrLength),
397 self.windowOffset)
398 e.windowOffset = self.windowOffset
399 raise e
401 if computedDataLength != expectedDataLength:
402 e = InvalidWindow(
403 "The number of data bytes consumed (%d) doesn't match the expected number (%d)" % \
404 (computedDataLength, expectedDataLength),
405 self.windowOffset)
406 e.windowOffset = self.windowOffset
407 raise e
409 if computedTargetLength != expectedTargetLength:
410 e = InvalidWindow(
411 "The number of target bytes consumed (%d) doesn't match the expected number (%d)" % \
412 (computedTargetLength, expectedTargetLength),
413 self.windowOffset)
414 e.windowOffset = self.windowOffset
415 raise e
417 # It appears that the source length specified in the window, isn't exactly
418 # equal to what gets consumed. I suspect that's because the algorithm is using different
419 # offsets within the window, and one offset/length pair will reach the end of the window.
420 # However, this hasn't shown to be a clear indicator of corruption. So for now, I'm
421 # commenting it out.
423 #if computedSourceLength != expectedSourceLength:
424 # e = InvalidWindow(
425 # "The number of source bytes consumed (%d) doesn't match the expected number (%d)" % \
426 # (computedSourceLength, expectedSourceLength),
427 # self.windowOffset)
428 # e.windowOffset = self.windowOffset
429 # raise e
431 # Advance past the data. We do this using seek because we might have
432 # read a few bytes from the stream if it potentially had compressed data
433 self.origInstrStream.seek(self.windowOffset + self.windowLength)
435 def __repr__(self):
436 if hasattr(self, 'compressedInstrLength'):
437 str = 'cil: %d cdl: %d ' % (self.compressedInstrLength,
438 self.compressedDataLength)
439 else:
440 str = ''
442 return "<Window wo:%d so:%d sl:%d tl:%d %sil:%d dl:%d whl:%d wl:%d>" % (
443 self.windowOffset, self.sourceOffset, self.sourceLength,
444 self.targetLength, str, self.instrLength, self.dataLength,
445 self.windowHeaderLength, self.windowLength)
448 class Svndiff(object):
449 def __init__(self, fileobj, length):
450 self._f = fileobj
451 self.startingOffset = self._f.tell()
453 header = self._f.read(4)
454 if len(header) != 4:
455 raise EOFError, \
456 "Unexpected end of file while svndiff header at offset %d)" % \
457 (self._f.tell())
459 if header[0:3] != 'SVN':
460 raise InvalidSvndiffHeader, "Invalid svndiff header at offset %d" % \
461 (self.startingOffset)
463 self.version = ord(header[3])
464 if self.version not in [0, 1]:
465 raise InvalidSvndiffVersion, "Invalid svndiff version %d" % self.version
467 self._length = length - 4
469 def verify(self):
470 self._f.seek(self.startingOffset+4)
472 bs = ByteStream(self._f)
474 log(LOG_SVNDIFF, 2, "<Svndiff so: %d ver: %d>", self.startingOffset,
475 self.version)
477 try:
478 remaining = self._length
479 while remaining > 0:
480 w = Window(bs, self.version)
481 log(LOG_WINDOWS, 3, repr(w))
482 w.verify()
483 remaining -= w.windowLength
484 except PotentiallyFixableException, e:
485 e.svndiffStart = self.startingOffset
486 raise
489 def getDirHash(f):
490 l = f.readline()
491 if l != 'PLAIN\n':
492 raise ValueError, "Expected a PLAIN representation (%d)" % f.tell()
494 hash = {}
496 while True:
497 field = f.readline()[:-1]
498 if field == 'END':
499 break
500 assert(field[0] == 'K')
501 length = int(field.split(' ')[1])
502 field = f.readline()[:length]
504 value = f.readline()[:-1]
505 assert(value[0] == 'V')
506 length = int(value.split(' ')[1])
507 value = f.readline()[:length]
509 (type, txn) = value.split(' ')
510 hash[field] = [NodeType(type), NodeId(txn)]
512 return hash
516 class Rep(object):
517 def __init__(self, type, rev, offset, length, size, digest,
518 contentType, currentRev, noderev):
519 self.type = type
520 self.rev = int(rev)
521 self.offset = int(offset)
522 self.length = int(length)
523 self.size = int(size)
525 self.digest = digest
526 self.currentRev = currentRev
528 self.contentType = contentType
529 self.noderev = noderev
531 def __repr__(self):
532 if not self.contentType:
533 contentType = 'UNKNOWN'
534 else:
535 if self.contentType not in ['PLAIN', 'DELTA', None]:
536 contentType = 'INVALID'
537 else:
538 contentType = self.contentType
539 return '%s: %s %d %d %d %d %s' % (self.type, contentType, self.rev,
540 self.offset, self.length, self.size,
541 self.digest)
543 def verify(self, f, dumpInstructions, dumpWindows):
544 if self.contentType not in ['PLAIN', 'DELTA', None]:
545 e = InvalidRepHeader("Invalid rep header found at %d (%s)!" % \
546 (self.offset, self.contentType),
547 self.offset)
548 e.rep = self
549 e.noderev = self.noderev
550 raise e
552 if self.rev != currentRev:
553 print >>sys.stderr, "Skipping text rep since it isn't present in the current rev"
554 return
556 f.seek(self.offset)
557 header = f.read(5)
558 if header != self.contentType:
559 raise FsfsVerifyException, \
560 "Invalid rep header found at %d (%s, %s)!" % (self.offset, header,
561 self.contentType)
563 if header == 'DELTA':
564 # Consume the rest of the DELTA header
565 while f.read(1) != '\n':
566 pass
568 # This should be the start of the svndiff stream
569 actual_start = f.tell()
570 try:
571 svndiff = Svndiff(f, self.length)
572 svndiff.verify()
573 digest = None
574 except Exception, e:
575 e.rep = self
576 e.noderev = self.noderev
577 raise
579 if digest and (self.digest != NULL_DIGEST):
580 assert(digest == self.digest)
581 else:
582 if f.read(1) != '\n':
583 raise DataCorrupt, "Expected a '\\n' after PLAIN"
585 import md5
586 m = md5.new()
587 m.update(f.read(self.length))
589 if self.digest and self.digest != NULL_DIGEST \
590 and self.digest != m.hexdigest():
591 raise DataCorrupt, \
592 "PLAIN data is corrupted. Expected digest '%s', computed '%s'." % (
593 self.digest, m.hexdigest())
595 if f.read(7) != 'ENDREP\n':
596 raise DataCorrupt, "Terminating ENDREP missing!"
599 class TextRep(Rep):
600 def __init__(self, rev, offset, length, size, digest,
601 contentType, currentRev, noderev):
602 super(TextRep,self).__init__('text', rev, offset, length, size,
603 digest, contentType, currentRev, noderev)
606 class PropRep(Rep):
607 def __init__(self, rev, offset, length, size, digest,
608 contentType, currentRev, noderev):
609 super(PropRep,self).__init__('prop', rev, offset, length, size,
610 digest, contentType, currentRev, noderev)
613 class NodeId(object):
614 def __init__(self, nodeid):
615 (self.txn_name, offset) = nodeid.split('/')
616 self.offset = int(offset)
617 self.rev = int(self.txn_name.split('.')[2][1:])
619 def __repr__(self):
620 return self.txn_name + '/%d' % self.offset
622 def __eq__ (self, other):
623 s = self.txn_name + '/%d' % self.offset
624 if s == other:
625 return True
627 return False
630 class NodeType(object):
631 def __init__(self, t):
632 if (t != 'file') and (t != 'dir'):
633 raise ValueError, 'Invalid Node type received: "%s"' % t
634 self.type = t
636 def __repr__(self):
637 return self.type[:]
640 class NodeRev(object):
641 def __init__(self, f, currentRev):
642 self.pred = None
643 self.text = None
644 self.props = None
645 self.cpath = None
646 self.copyroot = None
647 self.copyfrom = None
648 self.dir = []
650 self.nodeOffset = f.tell()
652 while True:
653 line = f.readline()
654 if line == '':
655 raise IOError, "Unexpected end of file"
656 if line == '\n':
657 break
659 # break apart the line
660 try:
661 (field, value) = line.split(':', 1)
662 except:
663 print repr(line)
664 print self.nodeOffset
665 print f.tell()
666 raise
668 # pull of the leading space and trailing new line
669 value = value[1:-1]
671 if field == 'id':
672 self.id = NodeId(value)
673 elif field == 'type':
674 self.type = NodeType(value)
675 elif field == 'pred':
676 self.pred = NodeId(value)
677 elif field == 'text':
678 (rev, offset, length, size, digest) = value.split(' ')
679 rev = int(rev)
680 offset = int(offset)
681 length = int(length)
682 size = int(size)
684 if rev != currentRev:
685 contentType = None
686 else:
687 savedOffset = f.tell()
688 f.seek(offset)
689 contentType = f.read(5)
690 f.seek(savedOffset)
692 self.text = TextRep(rev, offset, length, size, digest,
693 contentType, currentRev, self)
694 elif field == 'props':
695 (rev, offset, length, size, digest) = value.split(' ')
696 rev = int(rev)
697 offset = int(offset)
698 length = int(length)
699 size = int(size)
701 if rev != currentRev:
702 contentType = None
703 else:
704 savedOffset = f.tell()
705 f.seek(offset)
706 contentType = f.read(5)
707 f.seek(savedOffset)
709 self.props = PropRep(rev, offset, length, size, digest,
710 contentType, currentRev, self)
711 elif field == 'cpath':
712 self.cpath = value
713 elif field == 'copyroot':
714 self.copyroot = value
715 elif field == 'copyfrom':
716 self.copyfrom = value
718 if self.type.type == 'dir':
719 if self.text:
720 if self.id.rev == self.text.rev:
721 offset = f.tell()
722 f.seek(self.text.offset)
723 self.dir = getDirHash(f)
724 f.seek(offset)
725 else:
726 # The directory entries are stored in another file.
727 print "Warning: dir entries are stored in rev %d for noderev %s" % (
728 self.text.rev, repr(self.id))
730 def __repr__(self):
731 str = 'NodeRev Id: %s\n type: %s\n' % (repr(self.id), repr(self.type))
732 if self.pred:
733 str = str + ' pred: %s\n' % repr(self.pred)
734 if self.text:
735 str = str + ' %s\n' % repr(self.text)
736 if self.props:
737 str = str + ' %s\n' % repr(self.props)
738 if self.cpath:
739 str = str + ' cpath: %s\n' % self.cpath
740 if self.copyroot:
741 str = str + ' copyroot: %s\n' % self.copyroot
742 if self.copyfrom:
743 str = str + ' copyfrom: %s\n' % self.copyfrom
744 if self.dir:
745 str = str + ' dir contents:\n'
746 for k in self.dir:
747 str = str + ' %s: %s\n' % (k, self.dir[k])
748 return str[:-1]
751 class ChangedPaths(object):
752 def __init__(self, f):
753 self.changedPaths = {}
755 while True:
756 currentOffset = revFile.tell()
757 action = revFile.readline()
758 if action == '\n' or action == '':
759 break
761 path = action[:-1]
762 try:
763 (id, action, textMod, propMod) = action[:-1].split(' ')[:4]
764 except:
765 raise DataCorrupt, \
766 "Data appears to be corrupt at offset %d" % currentOffset
767 path = path[len(' '.join([id, action, textMod, propMod]))+1:]
769 line = revFile.readline()
770 if line != '\n':
771 (copyfromRev, copyfromPath) = line[:-1].split(' ', 1)
772 else:
773 copyfromRev = -1
774 copyfromPath = ''
776 self.changedPaths[path] = (id, action, textMod, propMod,
777 copyfromRev, copyfromPath)
780 def __iter__(self):
781 return self.changedPaths.iteritems()
784 def getRootAndChangedPaths(revFile):
785 offset = -2
786 while True:
787 revFile.seek(offset, 2)
788 c = revFile.read(1)
789 if c == '\n':
790 offset = revFile.tell()
791 break
792 offset = offset - 1
794 (rootNode, changedPaths) = map(int, revFile.readline().split(' '))
796 return (rootNode, changedPaths)
799 def dumpChangedPaths(changedPaths):
800 print "Changed Path Information:"
801 for (path,
802 (id, action, textMod, propMod,
803 copyfromRev, copyfromPath)) in changedPaths:
804 print " %s:" % path
805 print " id: %s" % id
806 print " action: %s" % action
807 print " text mod: %s" % textMod
808 print " prop mod: %s" % propMod
809 if copyfromRev != -1:
810 print " copyfrom path: %s" % copyfromPath
811 print " copyfrom rev: %s" % copyfromRev
812 print
815 class WalkStrategy(object):
816 def __init__(self, filename, rootOffset, currentRev):
817 self.f = open(filename, 'rb')
818 self.rootOffset = rootOffset
819 self.f.seek(rootOffset)
820 self.currentRev = currentRev
822 def _nodeWalker(self):
823 raise NotImplementedError, "_nodeWalker is not implemented"
825 def __iter__(self):
826 self.f.seek(self.rootOffset)
827 return self._nodeWalker()
830 class ClassicStrategy(WalkStrategy):
831 def _nodeWalker (self):
832 noderev = NodeRev(self.f, self.currentRev)
833 yield noderev
835 if noderev.type.type == 'dir':
836 for e in noderev.dir:
837 if noderev.dir[e][1].rev == noderev.id.rev:
838 self.f.seek(noderev.dir[e][1].offset)
839 for x in self._nodeWalker():
840 yield x
843 class RegexpStrategy(WalkStrategy):
844 def __init__(self, filename, rootOffset, currentRev):
845 WalkStrategy.__init__(self, filename, rootOffset, currentRev)
847 # File object passed to the NodeRev() constructor so that it
848 # doesn't interfere with our regex search.
849 self.nodeFile = open(filename, 'rb')
851 def _nodeWalker(self):
852 nodeId_re = re.compile(r'^id: [a-z0-9\./]+$')
854 self.f.seek(0)
855 offset = 0
857 for line in self.f:
858 match = nodeId_re.search(line)
859 if match:
860 self.nodeFile.seek(offset)
861 noderev = NodeRev(self.nodeFile, self.currentRev)
862 yield noderev
864 offset = offset + len(line)
867 def verify(noderev, revFile, dumpInstructions, dumpWindows):
868 print noderev
870 if noderev.text:
871 noderev.text.verify(revFile,
872 dumpInstructions,
873 dumpWindows)
875 if noderev.props and noderev.props.rev == noderev.props.currentRev:
876 noderev.props.verify(revFile,
877 dumpInstructions,
878 dumpWindows)
880 print
883 def truncate(noderev, revFile):
884 txnId = noderev.id
886 print "Truncating node %s (%s)" % (txnId, noderev.cpath)
888 # Grab the text rep
889 textRep = noderev.text
891 # Fix the text rep contents
892 offset = textRep.offset
893 revFile.seek(offset, 0)
894 revFile.write("PLAIN\x0aENDREP\x0a")
896 # Fix the node rev
897 offset = noderev.nodeOffset
898 revFile.seek(offset, 0)
899 while True:
900 savedOffset = revFile.tell()
901 s = revFile.readline()
902 if s[:4] == 'text':
903 revFile.seek(savedOffset, 0)
904 break
906 line = revFile.readline()
907 revFile.seek(savedOffset, 0)
908 fields = line.split(' ')
909 overallLength = len(line)
911 fields[3] = '0' * len(fields[3])
912 fields[4] = '0' * len(fields[4])
913 fields[5] = 'd41d8cd98f00b204e9800998ecf8427e'
914 newTextRep = ' '.join(fields) + '\x0a'
915 assert(len(newTextRep) == overallLength)
916 revFile.write(newTextRep)
917 print "Done."
918 sys.exit(0)
921 def fixHeader(e, revFile):
922 '''Attempt to fix the rep header. e is expected to be of type
923 InvalidRepHeader, since the exception stores the necessary information
924 to help repair the file.'''
926 # First, we need to locate the real start of the text rep
927 textrep_re = re.compile(r'^(DELTA( \d+ \d+ \d+)?|PLAIN)$')
929 revFile.seek(0)
930 offset = 0
931 originalOffset = 0
932 for line in revFile:
933 m = textrep_re.match(line)
934 if m:
935 if offset >= originalOffset and offset < e.offset:
936 originalOffset = offset
937 headerLen = len(line)
938 offset = offset + len(line)
940 print "Original text rep located at", originalOffset
942 # Okay, now we have the original offset of the text rep that was
943 # in the process of being written out. The header portion of the
944 # text rep has a fsync() done after it, so the 4K blocks actually
945 # start after the header. We need to make sure to copy the header
946 # and the next 4K, to be on the safe side.
947 copyLen = 4096 + headerLen
949 revFile.seek(originalOffset)
950 block = revFile.read(copyLen)
951 print "Copy %d bytes from offset %d" % (copyLen, originalOffset)
953 print "Write %d bytes at offset %d" % (copyLen, e.offset)
954 revFile.seek(e.offset)
955 revFile.write(block)
956 revFile.flush()
958 print "Fixed? :-) Re-run fsfsverify without the -f option"
961 def fixStream(e, revFile):
962 startOffset = e.svndiffStart
963 errorOffset = e.windowOffset
965 repeatedBlockOffset = errorOffset - ((errorOffset - startOffset) % 4096)
967 # Now we need to move up the rest of the rep
969 # Determine the final offset by finding the end of the rep.
970 revFile.seek(errorOffset)
972 endrep_re = re.compile(".*ENDREP$")
973 srcLength = 0
974 for l in revFile:
975 srcLength += len(l)
976 m = endrep_re.match(l)
977 if m:
978 break
980 if not m:
981 raise "Couldn't find end of rep!"
983 finalOffset = errorOffset + srcLength
984 srcOffset = errorOffset
985 destOffset = repeatedBlockOffset
987 print "Copy %d bytes from offset %d" % (srcLength, srcOffset)
988 print "Write %d bytes at offset %d" % (srcLength, destOffset)
990 while srcOffset < finalOffset:
991 blen = 64*1024
992 if (finalOffset - srcOffset) < blen:
993 blen = finalOffset - srcOffset
994 revFile.seek(srcOffset)
995 block = revFile.read(blen)
996 revFile.seek(destOffset)
997 revFile.write(block)
999 srcOffset += blen
1000 destOffset += blen
1002 revFile.flush()
1003 revFile.close()
1005 print "Fixed? :-) Re-run fsfsverify without the -f option"
1008 def checkOptions(options):
1009 count = 0
1010 for k,v in options.__dict__.items():
1011 if v and (k in ['dumpChanged', 'truncate', 'fixRlle']):
1012 count = count + 1
1014 if count > 1:
1015 print >>sys.stderr, "Please use only one of -c, -f, and -t."
1016 sys.exit(1)
1018 if options.dumpChanged and (options.dumpWindows or options.dumpInstructions):
1019 print >>sys.stderr, \
1020 "-c is incompatible with -w and -i. Dropping -w and/or -i."
1022 if options.noVerify and (options.dumpWindows or options.dumpInstructions):
1023 print >>sys.stderr, \
1024 "--no-verify is incompatible with -w and -i. Dropping -w and/or -i."
1027 def handleError(error, withTraceback=False):
1028 print
1029 if withTraceback:
1030 import traceback
1031 traceback.print_exc()
1033 print >>sys.stderr,"Error %s: %s" % (error.__class__.__name__, str(e))
1034 print >>sys.stderr,"Try running with -f to fix the revision"
1035 sys.exit(1)
1038 if __name__ == '__main__':
1039 from optparse import OptionParser
1041 parser = OptionParser("usage: %prog [-w | -i | -r | -n] REV-FILE")
1042 parser.add_option("-c", "--changed-paths",
1043 action="store_true", dest="dumpChanged",
1044 help="Dump changed path information", default=False)
1045 parser.add_option("", "--no-verify",
1046 action="store_true", dest="noVerify",
1047 help="Don't parse svndiff streams.", default=False)
1048 parser.add_option("-i", "--instructions",
1049 action="store_true", dest="dumpInstructions",
1050 help="Dump instructions (implies -w)", default=False)
1051 parser.add_option("-w", "--windows",
1052 action="store_true", dest="dumpWindows",
1053 help="Dump windows", default=False)
1054 parser.add_option("-n", "--noderev-regexp",
1055 action="store_true", dest="noderevRegexp",
1056 help="Find all noderevs using a regexp", default=False)
1057 parser.add_option("-f", "--fix-read-length-line-error",
1058 action="store_true", dest="fixRlle",
1059 help="Attempt to fix the read length line error",
1060 default=False)
1061 parser.add_option("-t", "--truncate",
1062 action="store", type="string", dest="truncate",
1063 help="Truncate the specified node rev.",
1064 default=None)
1065 parser.add_option("", "--traceback",
1066 action="store_true", dest="showTraceback",
1067 help="Show error tracebacks (mainly used for debugging).",
1068 default=False)
1070 (options, args) = parser.parse_args()
1072 if len(args) != 1:
1073 print >>sys.stderr, "Please specify exactly one rev file."
1074 parser.print_help()
1075 sys.exit(1)
1077 checkOptions(options)
1079 filename = args[0]
1081 if options.dumpInstructions:
1082 options.dumpWindows = True
1083 LOG_MASK |= LOG_INSTRUCTIONS
1085 if options.dumpWindows:
1086 LOG_MASK |= LOG_WINDOWS
1088 if options.truncate or options.fixRlle:
1089 revFile = open(filename, 'r+b')
1090 else:
1091 revFile = open(filename, 'rb')
1093 (root, changed) = getRootAndChangedPaths(revFile)
1095 if options.dumpChanged:
1096 revFile.seek(changed)
1097 changedPaths = ChangedPaths(revFile)
1099 dumpChangedPaths(changedPaths)
1100 sys.exit(0)
1102 try:
1103 import re
1104 match = re.match('([0-9]+)', os.path.basename(filename))
1105 currentRev = int(match.group(1), 10)
1106 except:
1107 raise CmdlineError, \
1108 "The file name must start with a decimal number that indicates the revision"
1110 if options.noderevRegexp:
1111 strategy = RegexpStrategy(filename, root, currentRev)
1112 else:
1113 strategy = ClassicStrategy(filename, root, currentRev)
1115 # Make stderr the same as stdout. This helps when trying to catch all of the
1116 # output from a run.
1117 sys.stderr = sys.stdout
1119 try:
1120 for noderev in strategy:
1121 try:
1122 if options.truncate:
1123 # Check to see if this is the rev we need to truncate
1124 if options.truncate == noderev.id:
1125 truncate(noderev, revFile)
1127 else:
1128 print noderev
1130 if not options.noVerify:
1131 if noderev.text:
1132 noderev.text.verify(revFile,
1133 options.dumpInstructions,
1134 options.dumpWindows)
1136 if noderev.props and noderev.props.rev == noderev.props.currentRev:
1137 noderev.props.verify(revFile,
1138 options.dumpInstructions,
1139 options.dumpWindows)
1141 print
1142 except:
1143 sys.stdout.flush()
1144 raise
1145 except InvalidRepHeader, e:
1146 if not options.fixRlle:
1147 handleError(e, options.showTraceback)
1149 fixHeader(e, revFile)
1151 except PotentiallyFixableException, e:
1152 if not options.fixRlle:
1153 handleError(e, options.showTraceback)
1155 fixStream(e, revFile)