1 <?xml version=
"1.0" encoding=
"UTF-8"?>
2 <!DOCTYPE html PUBLIC
"-//W3C//DTD XHTML 1.1//EN"
3 "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4 <html xmlns=
"http://www.w3.org/1999/xhtml" xml:
lang=
"en">
6 <meta http-equiv=
"Content-Type" content=
"application/xhtml+xml; charset=UTF-8" />
7 <meta name=
"generator" content=
"AsciiDoc 10.2.0" />
8 <title>How to recover an object from scratch
</title>
9 <style type=
"text/css">
10 /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
14 font-family: Georgia,serif;
18 h1, h2, h3, h4, h5, h6,
19 div.title, caption.title,
20 thead, p.table.header,
22 #author, #revnumber, #revdate, #revremark,
24 font-family: Arial,Helvetica,sans-serif;
28 margin:
1em
5%
1em
5%;
33 text-decoration: underline;
49 h1, h2, h3, h4, h5, h6 {
57 border-bottom:
2px solid silver;
77 border:
1px solid silver;
88 ul
> li { color: #aaa; }
89 ul
> li
> * { color: black; }
91 .monospaced, code, pre {
92 font-family:
"Courier New", Courier, monospace;
99 white-space: pre-wrap;
109 #revnumber, #revdate, #revremark {
114 border-top:
2px solid silver;
120 padding-bottom:
0.5em;
124 padding-bottom:
0.5em;
129 margin-bottom:
1.5em;
131 div.imageblock, div.exampleblock, div.verseblock,
132 div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
133 div.admonitionblock {
135 margin-bottom:
1.5em;
137 div.admonitionblock {
139 margin-bottom:
2.0em;
144 div.content { /* Block element content. */
148 /* Block element titles. */
149 div.title, caption.title {
154 margin-bottom:
0.5em;
160 td div.title:first-child {
163 div.content div.title:first-child {
166 div.content + div.title {
170 div.sidebarblock
> div.content {
172 border:
1px solid #dddddd;
173 border-left:
4px solid #f0f0f0;
177 div.listingblock
> div.content {
178 border:
1px solid #dddddd;
179 border-left:
5px solid #f0f0f0;
184 div.quoteblock, div.verseblock {
188 border-left:
5px solid #f0f0f0;
192 div.quoteblock
> div.attribution {
197 div.verseblock
> pre.content {
198 font-family: inherit;
201 div.verseblock
> div.attribution {
205 /* DEPRECATED: Pre version
8.2.7 verse style literal block. */
206 div.verseblock + div.attribution {
210 div.admonitionblock .icon {
214 text-decoration: underline;
216 padding-right:
0.5em;
218 div.admonitionblock td.content {
220 border-left:
3px solid #dddddd;
223 div.exampleblock
> div.content {
224 border-left:
3px solid #dddddd;
228 div.imageblock div.content { padding-left:
0; }
229 span.image img { border-style: none; vertical-align: text-bottom; }
230 a.image:visited { color: white; }
234 margin-bottom:
0.8em;
247 list-style-position: outside;
250 list-style-type: decimal;
253 list-style-type: lower-alpha;
256 list-style-type: upper-alpha;
259 list-style-type: lower-roman;
262 list-style-type: upper-roman;
265 div.compact ul, div.compact ol,
266 div.compact p, div.compact p,
267 div.compact div, div.compact div {
269 margin-bottom:
0.1em;
281 margin-bottom:
0.8em;
284 padding-bottom:
15px;
286 dt.hdlist1.strong, td.hdlist1.strong {
292 padding-right:
0.8em;
298 div.hdlist.compact tr {
307 .footnote, .footnoteref {
311 span.footnote, span.footnoteref {
312 vertical-align: super;
316 margin:
20px
0 20px
0;
320 #footnotes div.footnote {
326 border-top:
1px solid silver;
335 padding-right:
0.5em;
336 padding-bottom:
0.3em;
344 #footer-badges { display: none; }
348 margin-bottom:
2.5em;
356 margin-bottom:
0.1em;
359 div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
376 span.aqua { color: aqua; }
377 span.black { color: black; }
378 span.blue { color: blue; }
379 span.fuchsia { color: fuchsia; }
380 span.gray { color: gray; }
381 span.green { color: green; }
382 span.lime { color: lime; }
383 span.maroon { color: maroon; }
384 span.navy { color: navy; }
385 span.olive { color: olive; }
386 span.purple { color: purple; }
387 span.red { color: red; }
388 span.silver { color: silver; }
389 span.teal { color: teal; }
390 span.white { color: white; }
391 span.yellow { color: yellow; }
393 span.aqua-background { background: aqua; }
394 span.black-background { background: black; }
395 span.blue-background { background: blue; }
396 span.fuchsia-background { background: fuchsia; }
397 span.gray-background { background: gray; }
398 span.green-background { background: green; }
399 span.lime-background { background: lime; }
400 span.maroon-background { background: maroon; }
401 span.navy-background { background: navy; }
402 span.olive-background { background: olive; }
403 span.purple-background { background: purple; }
404 span.red-background { background: red; }
405 span.silver-background { background: silver; }
406 span.teal-background { background: teal; }
407 span.white-background { background: white; }
408 span.yellow-background { background: yellow; }
410 span.big { font-size:
2em; }
411 span.small { font-size:
0.6em; }
413 span.underline { text-decoration: underline; }
414 span.overline { text-decoration: overline; }
415 span.line-through { text-decoration: line-through; }
417 div.unbreakable { page-break-inside: avoid; }
427 margin-bottom:
1.5em;
429 div.tableblock
> table {
430 border:
3px solid #
527bbd;
432 thead, p.table.header {
439 /* Because the table frame attribute is overridden by CSS in most browsers. */
440 div.tableblock
> table[
frame=
"void"] {
443 div.tableblock
> table[
frame=
"hsides"] {
444 border-left-style: none;
445 border-right-style: none;
447 div.tableblock
> table[
frame=
"vsides"] {
448 border-top-style: none;
449 border-bottom-style: none;
460 margin-bottom:
1.5em;
462 thead, p.tableblock.header {
473 border-color: #
527bbd;
474 border-collapse: collapse;
476 th.tableblock, td.tableblock {
480 border-color: #
527bbd;
483 table.tableblock.frame-topbot {
484 border-left-style: hidden;
485 border-right-style: hidden;
487 table.tableblock.frame-sides {
488 border-top-style: hidden;
489 border-bottom-style: hidden;
491 table.tableblock.frame-none {
492 border-style: hidden;
495 th.tableblock.halign-left, td.tableblock.halign-left {
498 th.tableblock.halign-center, td.tableblock.halign-center {
501 th.tableblock.halign-right, td.tableblock.halign-right {
505 th.tableblock.valign-top, td.tableblock.valign-top {
508 th.tableblock.valign-middle, td.tableblock.valign-middle {
509 vertical-align: middle;
511 th.tableblock.valign-bottom, td.tableblock.valign-bottom {
512 vertical-align: bottom;
523 padding-bottom:
0.5em;
524 border-top:
2px solid silver;
525 border-bottom:
2px solid silver;
530 body.manpage div.sectionbody {
535 body.manpage div#toc { display: none; }
540 <script type=
"text/javascript">
542 var asciidoc = { // Namespace.
544 /////////////////////////////////////////////////////////////////////
545 // Table Of Contents generator
546 /////////////////////////////////////////////////////////////////////
548 /* Author: Mihai Bazon, September
2002
549 * http://students.infoiasi.ro/~mishoo
551 * Table Of Content generator
554 * Feel free to use this script under the terms of the GNU General Public
555 * License, as long as you do not remove or alter this notice.
558 /* modified by Troy D. Hanson, September
2006. License: GPL */
559 /* modified by Stuart Rackham,
2006,
2009. License: GPL */
562 toc: function (toclevels) {
564 function getText(el) {
566 for (var i = el.firstChild; i != null; i = i.nextSibling) {
567 if (i.nodeType ==
3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
569 else if (i.firstChild != null)
575 function TocEntry(el, text, toclevel) {
578 this.toclevel = toclevel;
581 function tocEntries(el, toclevels) {
582 var result = new Array;
583 var re = new RegExp('[hH]([
1-'+(toclevels+
1)+'])');
584 // Function that scans the DOM tree for header elements (the DOM2
585 // nodeIterator API would be a better technique but not supported by all
587 var iterate = function (el) {
588 for (var i = el.firstChild; i != null; i = i.nextSibling) {
589 if (i.nodeType ==
1 /* Node.ELEMENT_NODE */) {
590 var mo = re.exec(i.tagName);
591 if (mo && (i.getAttribute(
"class") || i.getAttribute(
"className")) !=
"float") {
592 result[result.length] = new TocEntry(i, getText(i), mo[
1]-
1);
602 var toc = document.getElementById(
"toc");
607 // Delete existing TOC entries in case we're reloading the TOC.
608 var tocEntriesToRemove = [];
610 for (i =
0; i < toc.childNodes.length; i++) {
611 var entry = toc.childNodes[i];
612 if (entry.nodeName.toLowerCase() == 'div'
613 && entry.getAttribute(
"class")
614 && entry.getAttribute(
"class").match(/^toclevel/))
615 tocEntriesToRemove.push(entry);
617 for (i =
0; i < tocEntriesToRemove.length; i++) {
618 toc.removeChild(tocEntriesToRemove[i]);
621 // Rebuild TOC entries.
622 var entries = tocEntries(document.getElementById(
"content"), toclevels);
623 for (var i =
0; i < entries.length; ++i) {
624 var entry = entries[i];
625 if (entry.element.id ==
"")
626 entry.element.id =
"_toc_" + i;
627 var a = document.createElement(
"a");
628 a.href =
"#" + entry.element.id;
629 a.appendChild(document.createTextNode(entry.text));
630 var div = document.createElement(
"div");
632 div.className =
"toclevel" + entry.toclevel;
633 toc.appendChild(div);
635 if (entries.length ==
0)
636 toc.parentNode.removeChild(toc);
640 /////////////////////////////////////////////////////////////////////
641 // Footnotes generator
642 /////////////////////////////////////////////////////////////////////
644 /* Based on footnote generation code from:
645 * http://www.brandspankingnew.net/archive/
2005/
07/format_footnote.html
648 footnotes: function () {
649 // Delete existing footnote entries in case we're reloading the footnodes.
651 var noteholder = document.getElementById(
"footnotes");
655 var entriesToRemove = [];
656 for (i =
0; i < noteholder.childNodes.length; i++) {
657 var entry = noteholder.childNodes[i];
658 if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute(
"class") ==
"footnote")
659 entriesToRemove.push(entry);
661 for (i =
0; i < entriesToRemove.length; i++) {
662 noteholder.removeChild(entriesToRemove[i]);
665 // Rebuild footnote entries.
666 var cont = document.getElementById(
"content");
667 var spans = cont.getElementsByTagName(
"span");
670 for (i=
0; i
<spans.length; i++) {
671 if (spans[i].className ==
"footnote") {
673 var note = spans[i].getAttribute(
"data-note");
675 // Use [\s\S] in place of . so multi-line matches work.
676 // Because JavaScript has no s (dotall) regex flag.
677 note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[
1];
679 "[<a id='_footnoteref_" + n +
"' href='#_footnote_" + n +
680 "' title='View footnote' class='footnote'>" + n +
"</a>]";
681 spans[i].setAttribute(
"data-note", note);
683 noteholder.innerHTML +=
684 "<div class='footnote' id='_footnote_" + n +
"'>" +
685 "<a href='#_footnoteref_" + n +
"' title='Return to text'>" +
686 n +
"</a>. " + note +
"</div>";
687 var id =spans[i].getAttribute(
"id");
688 if (id != null) refs[
"#"+id] = n;
692 noteholder.parentNode.removeChild(noteholder);
694 // Process footnoterefs.
695 for (i=
0; i
<spans.length; i++) {
696 if (spans[i].className ==
"footnoteref") {
697 var href = spans[i].getElementsByTagName(
"a")[
0].getAttribute(
"href");
698 href = href.match(/#.*/)[
0]; // Because IE return full URL.
701 "[<a href='#_footnote_" + n +
702 "' title='View footnote' class='footnote'>" + n +
"</a>]";
708 install: function(toclevels) {
711 function reinstall() {
712 asciidoc.footnotes();
714 asciidoc.toc(toclevels);
718 function reinstallAndRemoveTimer() {
719 clearInterval(timerId);
723 timerId = setInterval(reinstall,
500);
724 if (document.addEventListener)
725 document.addEventListener(
"DOMContentLoaded", reinstallAndRemoveTimer, false);
727 window.onload = reinstallAndRemoveTimer;
735 <body class=
"article">
737 <h1>How to recover an object from scratch
</h1>
738 <span id=
"revdate">2023-
09-
22</span>
742 <div class=
"sectionbody">
743 <div class=
"paragraph"><p>I was recently presented with a repository with a corrupted packfile,
744 and was asked if the data was recoverable. This post-mortem describes
745 the steps I took to investigate and fix the problem. I thought others
746 might find the process interesting, and it might help somebody in the
747 same situation.
</p></div>
748 <div class=
"sidebarblock">
749 <div class=
"content">
750 <div class=
"paragraph"><p>Note: In this case, no good copy of the repository was available. For
751 the much easier case where you can get the corrupted object from
752 elsewhere, see
<a href=
"recover-corrupted-blob-object.html">this howto
</a>.
</p></div>
754 <div class=
"paragraph"><p>I started with an fsck, which found a problem with exactly one object
755 (I
’ve used $pack and $obj below to keep the output readable, and also
756 because I
’ll refer to them later):
</p></div>
757 <div class=
"listingblock">
758 <div class=
"content">
759 <pre><code> $ git fsck
760 error: $pack SHA1 checksum mismatch
761 error: index CRC mismatch for object $obj from $pack at offset
51653873
762 error: inflate: data stream error (incorrect data check)
763 error: cannot unpack $obj from $pack at offset
51653873</code></pre>
765 <div class=
"paragraph"><p>The pack checksum failing means a byte is munged somewhere, and it is
766 presumably in the object mentioned (since both the index checksum and
767 zlib were failing).
</p></div>
768 <div class=
"paragraph"><p>Reading the zlib source code, I found that
"incorrect data check" means
769 that the adler-
32 checksum at the end of the zlib data did not match the
770 inflated data. So stepping the data through zlib would not help, as it
771 did not fail until the very end, when we realize the CRC does not match.
772 The problematic bytes could be anywhere in the object data.
</p></div>
773 <div class=
"paragraph"><p>The first thing I did was pull the broken data out of the packfile. I
774 needed to know how big the object was, which I found out with:
</p></div>
775 <div class=
"listingblock">
776 <div class=
"content">
777 <pre><code> $ git show-index
<$idx | cut -d' ' -f1 | sort -n | grep -A1
51653873
779 51664736</code></pre>
781 <div class=
"paragraph"><p>Show-index gives us the list of objects and their offsets. We throw away
782 everything but the offsets, and then sort them so that our interesting
783 offset (which we got from the fsck output above) is followed immediately
784 by the offset of the next object. Now we know that the object data is
785 10863 bytes long, and we can grab it with:
</p></div>
786 <div class=
"listingblock">
787 <div class=
"content">
788 <pre><code> dd if=$pack of=object bs=
1 skip=
51653873 count=
10863</code></pre>
790 <div class=
"paragraph"><p>I inspected a hexdump of the data, looking for any obvious bogosity
791 (e.g., a
4K run of zeroes would be a good sign of filesystem
792 corruption). But everything looked pretty reasonable.
</p></div>
793 <div class=
"paragraph"><p>Note that the
"object" file isn
’t fit for feeding straight to zlib; it
794 has the git packed object header, which is variable-length. We want to
795 strip that off so we can start playing with the zlib data directly. You
796 can either work your way through it manually (the format is described in
797 <a href=
"../gitformat-pack.html">gitformat-pack(
5)
</a>),
798 or you can walk through it in a debugger. I did the latter, creating a
799 valid pack like:
</p></div>
800 <div class=
"listingblock">
801 <div class=
"content">
802 <pre><code> # pack magic and version
803 printf 'PACK\
0\
0\
0\
2'
>tmp.pack
804 # pack has one object
805 printf '\
0\
0\
0\
1'
>>tmp.pack
806 # now add our object data
807 cat object
>>tmp.pack
808 # and then append the pack trailer
809 /path/to/git.git/t/helper/test-tool sha1 -b
<tmp.pack
>trailer
810 cat trailer
>>tmp.pack
</code></pre>
812 <div class=
"paragraph"><p>and then running
"git index-pack tmp.pack" in the debugger (stop at
813 unpack_raw_entry). Doing this, I found that there were
3 bytes of header
814 (and the header itself had a sane type and size). So I stripped those
816 <div class=
"listingblock">
817 <div class=
"content">
818 <pre><code> dd if=object of=zlib bs=
1 skip=
3</code></pre>
820 <div class=
"paragraph"><p>I ran the result through zlib
’s inflate using a custom C program. And
821 while it did report the error, I did get the right number of output
822 bytes (i.e., it matched git
’s size header that we decoded above). But
823 feeding the result back to
"git hash-object" didn
’t produce the same
824 sha1. So there were some wrong bytes, but I didn
’t know which. The file
825 happened to be C source code, so I hoped I could notice something
826 obviously wrong with it, but I didn
’t. I even got it to compile!
</p></div>
827 <div class=
"paragraph"><p>I also tried comparing it to other versions of the same path in the
828 repository, hoping that there would be some part of the diff that didn
’t
829 make sense. Unfortunately, this happened to be the only revision of this
830 particular file in the repository, so I had nothing to compare against.
</p></div>
831 <div class=
"paragraph"><p>So I took a different approach. Working under the guess that the
832 corruption was limited to a single byte, I wrote a program to munge each
833 byte individually, and try inflating the result. Since the object was
834 only
10K compressed, that worked out to about
2.5M attempts, which took
835 a few minutes.
</p></div>
836 <div class=
"paragraph"><p>The program I used is here:
</p></div>
837 <div class=
"listingblock">
838 <div class=
"content">
839 <pre><code>#include
<stdio.h
>
840 #include
<unistd.h
>
841 #include
<string.h
>
842 #include
<signal.h
>
843 #include
<zlib.h
>
845 static int try_zlib(unsigned char *buf, int len)
847 /* make this absurdly large so we don't have to loop */
848 static unsigned char out[
1024*
1024];
852 memset(
&z,
0, sizeof(z));
858 z.avail_out = sizeof(out);
860 ret = inflate(
&z,
0);
866 static int counter =
0;
867 static void progress(int sig)
869 fprintf(stderr,
"\r%d", counter);
875 /* oversized so we can read the whole buffer in */
876 unsigned char buf[
1024*
1024];
880 signal(SIGALRM, progress);
883 len = read(
0, buf, sizeof(buf));
884 for (i =
0; i
< len; i++) {
885 unsigned char c = buf[i];
886 for (j =
0; j
<=
0xff; j++) {
890 if (try_zlib(buf, len))
891 printf(
"i=%d, j=%x\n", i, j);
897 fprintf(stderr,
"\n");
901 <div class=
"paragraph"><p>I compiled and ran with:
</p></div>
902 <div class=
"listingblock">
903 <div class=
"content">
904 <pre><code> gcc -Wall -Werror -O3 munge.c -o munge -lz
905 ./munge
<zlib
</code></pre>
907 <div class=
"paragraph"><p>There were a few false positives early on (if you write
"no data" in the
908 zlib header, zlib thinks it
’s just fine :) ). But I got a hit about
909 halfway through:
</p></div>
910 <div class=
"listingblock">
911 <div class=
"content">
912 <pre><code> i=
5642, j=c7
</code></pre>
914 <div class=
"paragraph"><p>I let it run to completion, and got a few more hits at the end (where it
915 was munging the CRC to match our broken data). So there was a good
916 chance this middle hit was the source of the problem.
</p></div>
917 <div class=
"paragraph"><p>I confirmed by tweaking the byte in a hex editor, zlib inflating the
918 result (no errors!), and then piping the output into
"git hash-object",
919 which reported the sha1 of the broken object. Success!
</p></div>
920 <div class=
"paragraph"><p>I fixed the packfile itself with:
</p></div>
921 <div class=
"listingblock">
922 <div class=
"content">
923 <pre><code> chmod +w $pack
924 printf '\xc7' | dd of=$pack bs=
1 seek=
51659518 conv=notrunc
925 chmod -w $pack
</code></pre>
927 <div class=
"paragraph"><p>The
<code>\xc7
</code> comes from the replacement byte our
"munge" program found.
928 The offset
51659518 is derived by taking the original object offset
929 (
51653873), adding the replacement offset found by
"munge" (
5642), and
930 then adding back in the
3 bytes of git header we stripped.
</p></div>
931 <div class=
"paragraph"><p>After that,
"git fsck" ran clean.
</p></div>
932 <div class=
"paragraph"><p>As for the corruption itself, I was lucky that it was indeed a single
933 byte. In fact, it turned out to be a single bit. The byte
0xc7 was
934 corrupted to
0xc5. So presumably it was caused by faulty hardware, or a
935 cosmic ray.
</p></div>
936 <div class=
"paragraph"><p>And the aborted attempt to look at the inflated output to see what was
937 wrong? I could have looked forever and never found it. Here
’s the diff
938 between what the corrupted data inflates to, versus the real data:
</p></div>
939 <div class=
"listingblock">
940 <div class=
"content">
941 <pre><code> - cp = strtok (arg,
"+");
942 + cp = strtok (arg,
".");
</code></pre>
944 <div class=
"paragraph"><p>It tweaked one byte and still ended up as valid, readable C that just
945 happened to do something totally different! One takeaway is that on a
946 less unlucky day, looking at the zlib output might have actually been
947 helpful, as most random changes would actually break the C code.
</p></div>
948 <div class=
"paragraph"><p>But more importantly, git
’s hashing and checksumming noticed a problem
949 that easily could have gone undetected in another system. The result
950 still compiled, but would have caused an interesting bug (that would
951 have been blamed on some random commit).
</p></div>
955 <h2 id=
"_the_adventure_continues_8230">The adventure continues
…</h2>
956 <div class=
"sectionbody">
957 <div class=
"paragraph"><p>I ended up doing this again! Same entity, new hardware. The assumption
958 at this point is that the old disk corrupted the packfile, and then the
959 corruption was migrated to the new hardware (because it was done by
960 rsync or similar, and no fsck was done at the time of migration).
</p></div>
961 <div class=
"paragraph"><p>This time, the affected blob was over
20 megabytes, which was far too
962 large to do a brute-force on. I followed the instructions above to
963 create the
<code>zlib
</code> file. I then used the
<code>inflate
</code> program below to pull
964 the corrupted data from that. Examining that output gave me a hint about
965 where in the file the corruption was. But now I was working with the
966 file itself, not the zlib contents. So knowing the sha1 of the object
967 and the approximate area of the corruption, I used the
<code>sha1-munge
</code>
968 program below to brute-force the correct byte.
</p></div>
969 <div class=
"paragraph"><p>Here
’s the inflate program (it
’s essentially
<code>gunzip
</code> but without the
970 <code>.gz
</code> header processing):
</p></div>
971 <div class=
"listingblock">
972 <div class=
"content">
973 <pre><code>#include
<stdio.h
>
974 #include
<string.h
>
975 #include
<zlib.h
>
976 #include
<stdlib.h
>
978 int main(int argc, char **argv)
981 * oversized so we can read the whole buffer in;
982 * this could actually be switched to streaming
983 * to avoid any memory limitations
985 static unsigned char buf[
25 *
1024 *
1024];
986 static unsigned char out[
25 *
1024 *
1024];
991 len = read(
0, buf, sizeof(buf));
992 memset(
&z,
0, sizeof(z));
998 z.avail_out = sizeof(out);
1000 ret = inflate(
&z,
0);
1001 if (ret != Z_OK
&& ret != Z_STREAM_END)
1002 fprintf(stderr,
"initial inflate failed (%d)\n", ret);
1004 fprintf(stderr,
"outputting %lu bytes", z.total_out);
1005 fwrite(out,
1, z.total_out, stdout);
1009 <div class=
"paragraph"><p>And here is the
<code>sha1-munge
</code> program:
</p></div>
1010 <div class=
"listingblock">
1011 <div class=
"content">
1012 <pre><code>#include
<stdio.h
>
1013 #include
<unistd.h
>
1014 #include
<string.h
>
1015 #include
<signal.h
>
1016 #include
<openssl/sha.h
>
1017 #include
<stdlib.h
>
1020 static int counter =
0;
1021 static void progress(int sig)
1023 fprintf(stderr,
"\r%d", counter);
1027 static const signed char hexval_table[
256] = {
1028 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
00-
07 */
1029 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
08-
0f */
1030 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
10-
17 */
1031 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
18-
1f */
1032 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
20-
27 */
1033 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
28-
2f */
1034 0,
1,
2,
3,
4,
5,
6,
7, /*
30-
37 */
1035 8,
9, -
1, -
1, -
1, -
1, -
1, -
1, /*
38-
3f */
1036 -
1,
10,
11,
12,
13,
14,
15, -
1, /*
40-
47 */
1037 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
48-
4f */
1038 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
50-
57 */
1039 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
58-
5f */
1040 -
1,
10,
11,
12,
13,
14,
15, -
1, /*
60-
67 */
1041 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
68-
67 */
1042 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
70-
77 */
1043 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
78-
7f */
1044 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
80-
87 */
1045 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
88-
8f */
1046 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
90-
97 */
1047 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /*
98-
9f */
1048 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /* a0-a7 */
1049 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /* a8-af */
1050 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /* b0-b7 */
1051 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /* b8-bf */
1052 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /* c0-c7 */
1053 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /* c8-cf */
1054 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /* d0-d7 */
1055 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /* d8-df */
1056 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /* e0-e7 */
1057 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /* e8-ef */
1058 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /* f0-f7 */
1059 -
1, -
1, -
1, -
1, -
1, -
1, -
1, -
1, /* f8-ff */
1062 static inline unsigned int hexval(unsigned char c)
1064 return hexval_table[c];
1067 static int get_sha1_hex(const char *hex, unsigned char *sha1)
1070 for (i =
0; i
< 20; i++) {
1073 * hex[
1]=='\
0' is caught when val is checked below,
1074 * but if hex[
0] is NUL we have to avoid reading
1075 * past the end of the string:
1079 val = (hexval(hex[
0])
<< 4) | hexval(hex[
1]);
1080 if (val
& ~
0xff)
1088 int main(int argc, char **argv)
1090 /* oversized so we can read the whole buffer in */
1091 static unsigned char buf[
25 *
1024 *
1024];
1094 unsigned char have[
20], want[
20];
1099 if (!argv[
1] || get_sha1_hex(argv[
1], want)) {
1100 fprintf(stderr,
"usage: sha1-munge <sha1> [start] <file.in\n");
1105 start = atoi(argv[
2]);
1109 len = read(
0, buf, sizeof(buf));
1110 header_len = sprintf(header,
"blob %d", len) +
1;
1111 fprintf(stderr,
"using header: %s\n", header);
1114 * We keep a running sha1 so that if you are munging
1115 * near the end of the file, we do not have to re-sha1
1116 * the unchanged earlier bytes
1118 SHA1_Init(
&orig);
1119 SHA1_Update(
&orig, header, header_len);
1121 SHA1_Update(
&orig, buf, start);
1123 signal(SIGALRM, progress);
1126 for (i = start; i
< len; i++) {
1132 * deletion -- this would not actually work in practice,
1133 * I think, because we've already committed to a
1134 * particular size in the header. Ditto for addition
1135 * below. In those cases, you'd have to do the whole
1136 * sha1 from scratch, or possibly keep three running
1137 *
"orig" sha1 computations going.
1139 memcpy(
&x,
&orig, sizeof(x));
1140 SHA1_Update(
&x, buf + i +
1, len - i -
1);
1141 SHA1_Final(have,
&x);
1142 if (!memcmp(have, want,
20))
1143 printf(
"i=%d, deletion\n", i);
1147 * replacement -- note that this tries each of the
256
1148 * possible bytes. If you suspect a single-bit flip,
1149 * it would be much shorter to just try the
8
1150 * bit-flipped variants.
1153 for (j =
0; j
<=
0xff; j++) {
1156 memcpy(
&x,
&orig, sizeof(x));
1157 SHA1_Update(
&x, buf + i, len - i);
1158 SHA1_Final(have,
&x);
1159 if (!memcmp(have, want,
20))
1160 printf(
"i=%d, j=%02x\n", i, j);
1166 for (j =
0; j
<=
0xff; j++) {
1167 unsigned char extra = j;
1168 memcpy(
&x,
&orig, sizeof(x));
1169 SHA1_Update(
&x,
&extra,
1);
1170 SHA1_Update(
&x, buf + i, len - i);
1171 SHA1_Final(have,
&x);
1172 if (!memcmp(have, want,
20))
1173 printf(
"i=%d, addition=%02x", i, j);
1177 SHA1_Update(
&orig, buf + i,
1);
1182 fprintf(stderr,
"\r%d\n", counter);
1189 <div id=
"footnotes"><hr /></div>
1191 <div id=
"footer-text">
1193 2023-
09-
22 17:
05:
17 PDT