2 * Copyright 2003-2005 Colin Percival
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
28 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 cperciva Exp $");
31 #include <sys/types.h>
43 #if defined(__APPLE__)
44 #include <libkern/OSByteOrder.h>
45 #define htole64(x) OSSwapHostToLittleInt64(x)
46 #elif defined(__linux__)
48 #elif defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
49 #define htole64(x) (x)
51 #error Provide htole64 for this platform
54 #include "chrome/installer/mac/third_party/bsdiff/sha1_adapter.h"
56 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
58 static void split(off_t
*I
,off_t
*V
,off_t start
,off_t len
,off_t h
)
60 off_t i
,j
,k
,x
,tmp
,jj
,kk
;
63 for(k
=start
;k
<start
+len
;k
+=j
) {
65 for(i
=1;k
+i
<start
+len
;i
++) {
71 tmp
=I
[k
+j
];I
[k
+j
]=I
[k
+i
];I
[k
+i
]=tmp
;
75 for(i
=0;i
<j
;i
++) V
[I
[k
+i
]]=k
+j
-1;
81 x
=V
[I
[start
+len
/2]+h
];
83 for(i
=start
;i
<start
+len
;i
++) {
85 if(V
[I
[i
]+h
]==x
) kk
++;
93 } else if(V
[I
[i
]+h
]==x
) {
94 tmp
=I
[i
];I
[i
]=I
[jj
+j
];I
[jj
+j
]=tmp
;
97 tmp
=I
[i
];I
[i
]=I
[kk
+k
];I
[kk
+k
]=tmp
;
103 if(V
[I
[jj
+j
]+h
]==x
) {
106 tmp
=I
[jj
+j
];I
[jj
+j
]=I
[kk
+k
];I
[kk
+k
]=tmp
;
111 if(jj
>start
) split(I
,V
,start
,jj
-start
,h
);
113 for(i
=0;i
<kk
-jj
;i
++) V
[I
[jj
+i
]]=kk
-1;
114 if(jj
==kk
-1) I
[jj
]=-1;
116 if(start
+len
>kk
) split(I
,V
,kk
,start
+len
-kk
,h
);
119 static void qsufsort(off_t
*I
,off_t
*V
,u_char
*old
,off_t oldsize
)
124 for(i
=0;i
<256;i
++) buckets
[i
]=0;
125 for(i
=0;i
<oldsize
;i
++) buckets
[old
[i
]]++;
126 for(i
=1;i
<256;i
++) buckets
[i
]+=buckets
[i
-1];
127 for(i
=255;i
>0;i
--) buckets
[i
]=buckets
[i
-1];
130 for(i
=0;i
<oldsize
;i
++) I
[++buckets
[old
[i
]]]=i
;
132 for(i
=0;i
<oldsize
;i
++) V
[i
]=buckets
[old
[i
]];
134 for(i
=1;i
<256;i
++) if(buckets
[i
]==buckets
[i
-1]+1) I
[buckets
[i
]]=-1;
137 for(h
=1;I
[0]!=-(oldsize
+1);h
+=h
) {
139 for(i
=0;i
<oldsize
+1;) {
144 if(len
) I
[i
-len
]=-len
;
151 if(len
) I
[i
-len
]=-len
;
154 for(i
=0;i
<oldsize
+1;i
++) I
[V
[i
]]=i
;
157 static off_t
matchlen(u_char
*old
,off_t oldsize
,u_char
*new,off_t newsize
)
161 for(i
=0;(i
<oldsize
)&&(i
<newsize
);i
++)
162 if(old
[i
]!=new[i
]) break;
167 static off_t
search(off_t
*I
,u_char
*old
,off_t oldsize
,
168 u_char
*new,off_t newsize
,off_t st
,off_t en
,off_t
*pos
)
173 x
=matchlen(old
+I
[st
],oldsize
-I
[st
],new,newsize
);
174 y
=matchlen(old
+I
[en
],oldsize
-I
[en
],new,newsize
);
186 if(memcmp(old
+I
[x
],new,MIN(oldsize
-I
[x
],newsize
))<0) {
187 return search(I
,old
,oldsize
,new,newsize
,x
,en
,pos
);
189 return search(I
,old
,oldsize
,new,newsize
,st
,x
,pos
);
193 static inline void offtout(off_t x
,u_char
*buf
)
195 *((off_t
*)buf
) = htole64(x
);
198 /* zlib provides compress2, which deflates to deflate (zlib) format. This is
199 * unfortunately distinct from gzip format in that the headers wrapping the
200 * decompressed data are different. gbspatch reads gzip-compressed data using
201 * the file-oriented gzread interface, which only supports gzip format.
202 * compress2gzip is identical to zlib's compress2 except that it produces gzip
203 * output compatible with gzread. This change is achieved by calling
204 * deflateInit2 instead of deflateInit and specifying 31 for windowBits;
205 * numbers greater than 15 cause the addition of a gzip wrapper. */
206 static int compress2gzip(Bytef
*dest
, uLongf
*destLen
,
207 const Bytef
*source
, uLong sourceLen
, int level
)
212 stream
.next_in
= (Bytef
*)source
;
213 stream
.avail_in
= (uInt
)sourceLen
;
215 stream
.next_out
= dest
;
216 stream
.avail_out
= (uInt
)*destLen
;
217 if ((uLong
)stream
.avail_out
!= *destLen
) return Z_BUF_ERROR
;
219 stream
.zalloc
= (alloc_func
)0;
220 stream
.zfree
= (free_func
)0;
221 stream
.opaque
= (voidpf
)0;
223 err
= deflateInit2(&stream
,
224 level
, Z_DEFLATED
, 31, 8, Z_DEFAULT_STRATEGY
);
225 if (err
!= Z_OK
) return err
;
227 err
= deflate(&stream
, Z_FINISH
);
228 if (err
!= Z_STREAM_END
) {
230 return err
== Z_OK
? Z_BUF_ERROR
: err
;
232 *destLen
= stream
.total_out
;
234 err
= deflateEnd(&stream
);
238 /* Recompress buf of size buf_len using bzip2 or gzip. The smallest version is
239 * used. The original uncompressed variant may be the smallest. Returns a
240 * number identifying the encoding, 1 for uncompressed, 2 for bzip2, 3 for
241 * gzip, and 4 for xz/lzma2. If the original uncompressed variant is not
242 * smallest, it is freed. The caller must free any buf after this function
244 static char make_small(u_char
**buf
, off_t
*buf_len
)
246 u_char
*source
= *buf
;
247 off_t source_len
= *buf_len
;
248 u_char
*bz2
, *gz
, *lzma
;
249 unsigned int bz2_len
;
250 size_t gz_len
, lzma_len
, lzma_pos
;
258 bz2_len
= source_len
+ 1;
259 bz2
= malloc(bz2_len
);
260 bz2_err
= BZ2_bzBuffToBuffCompress((char*)bz2
, &bz2_len
, (char*)source
,
261 source_len
, 9, 0, 0);
262 if (bz2_err
== BZ_OK
) {
263 if (bz2_len
< *buf_len
) {
271 } else if (bz2_err
== BZ_OUTBUFF_FULL
) {
275 errx(1, "BZ2_bzBuffToBuffCompress: %d", bz2_err
);
278 gz_len
= source_len
+ 1;
280 gz_err
= compress2gzip(gz
, &gz_len
, source
, source_len
, 9);
281 if (gz_err
== Z_OK
) {
282 if (gz_len
< *buf_len
) {
290 } else if (gz_err
== Z_BUF_ERROR
) {
294 errx(1, "compress2gzip: %d", gz_err
);
297 lzma_len
= source_len
+ 1;
298 lzma
= malloc(lzma_len
);
301 /* Equivalent to the options used by xz -9 -e. */
302 lzma_ck
= LZMA_CHECK_CRC64
;
303 if (!lzma_check_is_supported(lzma_ck
))
304 lzma_ck
= LZMA_CHECK_CRC32
;
305 lzma_err
= lzma_easy_buffer_encode(9 | LZMA_PRESET_EXTREME
,
308 lzma
, &lzma_pos
, lzma_len
);
309 if (lzma_err
== LZMA_OK
) {
310 if (lzma_pos
< *buf_len
) {
318 } else if (lzma_err
== LZMA_BUF_ERROR
) {
322 errx(1, "lzma_easy_buffer_encode: %d", lzma_err
);
332 int main(int argc
,char *argv
[])
336 off_t oldsize
,newsize
;
339 off_t lastscan
,lastpos
,lastoffset
;
341 off_t s
,Sf
,lenf
,Sb
,lenb
;
342 off_t overlap
,Ss
,lens
;
344 off_t cblen
, dblen
, eblen
;
345 u_char
*cb
, *db
, *eb
;
349 if(argc
!=4) errx(1,"usage: %s oldfile newfile patchfile",argv
[0]);
351 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
352 that we never try to malloc(0) and get a NULL pointer */
353 if(((fd
=open(argv
[1],O_RDONLY
,0))<0) ||
354 ((oldsize
=lseek(fd
,0,SEEK_END
))==-1) ||
355 ((old
=malloc(oldsize
+1))==NULL
) ||
356 (lseek(fd
,0,SEEK_SET
)!=0) ||
357 (read(fd
,old
,oldsize
)!=oldsize
) ||
358 (close(fd
)==-1)) err(1,"%s",argv
[1]);
360 if(((I
=malloc((oldsize
+1)*sizeof(off_t
)))==NULL
) ||
361 ((V
=malloc((oldsize
+1)*sizeof(off_t
)))==NULL
)) err(1,NULL
);
363 qsufsort(I
,V
,old
,oldsize
);
367 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
368 that we never try to malloc(0) and get a NULL pointer */
369 if(((fd
=open(argv
[2],O_RDONLY
,0))<0) ||
370 ((newsize
=lseek(fd
,0,SEEK_END
))==-1) ||
371 ((new=malloc(newsize
+1))==NULL
) ||
372 (lseek(fd
,0,SEEK_SET
)!=0) ||
373 (read(fd
,new,newsize
)!=newsize
) ||
374 (close(fd
)==-1)) err(1,"%s",argv
[2]);
376 if(((cb
=malloc(newsize
+1))==NULL
) ||
377 ((db
=malloc(newsize
+1))==NULL
) ||
378 ((eb
=malloc(newsize
+1))==NULL
)) err(1,NULL
);
383 /* Create the patch file */
384 if ((pf
= fopen(argv
[3], "wb")) == NULL
)
385 err(1, "%s", argv
[3]);
389 8 8 length of compressed control block (x)
390 16 8 length of compressed diff block (y)
391 24 8 length of compressed extra block (z)
392 32 8 length of old file
393 40 8 length of new file
394 48 20 SHA1 of old file
395 68 20 SHA1 of new file
396 88 1 encoding of control block
397 89 1 encoding of diff block
398 90 1 encoding of extra block
400 96 x compressed control block
401 96+x y compressed diff block
402 96+x+y z compressed extra block
403 Encodings are 1 (uncompressed), 2 (bzip2), 3 (gzip), and 4 (xz/lzma2).
406 memset(header
, 0, sizeof(header
));
407 if (fwrite(header
, sizeof(header
), 1, pf
) != 1)
408 err(1, "fwrite(%s)", argv
[3]);
409 memcpy(header
, "BSDIFF4G", 8);
410 offtout(oldsize
, header
+ 32);
411 offtout(newsize
, header
+ 40);
412 SHA1(old
, oldsize
, header
+ 48);
413 SHA1(new, newsize
, header
+ 68);
415 /* Compute the differences */
417 lastscan
=0;lastpos
=0;lastoffset
=0;
418 while(scan
<newsize
) {
421 for(scsc
=scan
+=len
;scan
<newsize
;scan
++) {
422 len
=search(I
,old
,oldsize
,new+scan
,newsize
-scan
,
425 for(;scsc
<scan
+len
;scsc
++)
426 if((scsc
+lastoffset
<oldsize
) &&
427 (old
[scsc
+lastoffset
] == new[scsc
]))
430 if(((len
==oldscore
) && (len
!=0)) ||
431 (len
>oldscore
+8)) break;
433 if((scan
+lastoffset
<oldsize
) &&
434 (old
[scan
+lastoffset
] == new[scan
]))
438 if((len
!=oldscore
) || (scan
==newsize
)) {
440 for(i
=0;(lastscan
+i
<scan
)&&(lastpos
+i
<oldsize
);) {
441 if(old
[lastpos
+i
]==new[lastscan
+i
]) s
++;
443 if(s
*2-i
>Sf
*2-lenf
) { Sf
=s
; lenf
=i
; };
449 for(i
=1;(scan
>=lastscan
+i
)&&(pos
>=i
);i
++) {
450 if(old
[pos
-i
]==new[scan
-i
]) s
++;
451 if(s
*2-i
>Sb
*2-lenb
) { Sb
=s
; lenb
=i
; };
455 if(lastscan
+lenf
>scan
-lenb
) {
456 overlap
=(lastscan
+lenf
)-(scan
-lenb
);
458 for(i
=0;i
<overlap
;i
++) {
459 if(new[lastscan
+lenf
-overlap
+i
]==
460 old
[lastpos
+lenf
-overlap
+i
]) s
++;
461 if(new[scan
-lenb
+i
]==
462 old
[pos
-lenb
+i
]) s
--;
463 if(s
>Ss
) { Ss
=s
; lens
=i
+1; };
471 db
[dblen
+i
]=new[lastscan
+i
]-old
[lastpos
+i
];
472 for(i
=0;i
<(scan
-lenb
)-(lastscan
+lenf
);i
++)
473 eb
[eblen
+i
]=new[lastscan
+lenf
+i
];
476 eblen
+=(scan
-lenb
)-(lastscan
+lenf
);
478 offtout(lenf
, cb
+ cblen
);
481 offtout((scan
- lenb
) - (lastscan
+ lenf
), cb
+ cblen
);
484 offtout((pos
- lenb
) - (lastpos
+ lenf
), cb
+ cblen
);
493 header
[88] = make_small(&cb
, &cblen
);
494 header
[89] = make_small(&db
, &dblen
);
495 header
[90] = make_small(&eb
, &eblen
);
497 if (fwrite(cb
, 1, cblen
, pf
) != cblen
)
499 if (fwrite(db
, 1, dblen
, pf
) != dblen
)
501 if (fwrite(eb
, 1, eblen
, pf
) != eblen
)
504 offtout(cblen
, header
+ 8);
505 offtout(dblen
, header
+ 16);
506 offtout(eblen
, header
+ 24);
508 /* Seek to the beginning, write the header, and close the file */
509 if (fseeko(pf
, 0, SEEK_SET
))
511 if (fwrite(header
, sizeof(header
), 1, pf
) != 1)
512 err(1, "fwrite(%s)", argv
[3]);
516 /* Free the memory we used */