1 /* vim:set ts=8 sw=8 sts=8 noet: */
3 bsdiff.c -- Binary patch generator.
5 Copyright 2003 Colin Percival
7 For the terms under which this work may be distributed, please see
8 the adjoining file "LICENSE".
11 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
13 --Benjamin Smedberg <benjamin@smedbergs.us>
14 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
16 --Darin Fisher <darin@meer.net>
31 #include <arpa/inet.h>
36 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
38 //-----------------------------------------------------------------------------
40 // This variable lives in libbz2. It's declared in bzlib_private.h, so we just
41 // declare it here to avoid including that entire header file.
42 extern unsigned int BZ2_crc32Table
[256];
45 crc32(const unsigned char *buf
, unsigned int len
)
47 unsigned int crc
= 0xffffffffL
;
49 const unsigned char *end
= buf
+ len
;
50 for (; buf
!= end
; ++buf
)
51 crc
= (crc
<< 8) ^ BZ2_crc32Table
[(crc
>> 24) ^ *buf
];
57 //-----------------------------------------------------------------------------
60 reporterr(int e
, const char *fmt
, ...)
65 vfprintf(stderr
, fmt
, args
);
73 split(PROffset32
*I
,PROffset32
*V
,PROffset32 start
,PROffset32 len
,PROffset32 h
)
75 PROffset32 i
,j
,k
,x
,tmp
,jj
,kk
;
78 for(k
=start
;k
<start
+len
;k
+=j
) {
80 for(i
=1;k
+i
<start
+len
;i
++) {
86 tmp
=I
[k
+j
];I
[k
+j
]=I
[k
+i
];I
[k
+i
]=tmp
;
90 for(i
=0;i
<j
;i
++) V
[I
[k
+i
]]=k
+j
-1;
96 x
=V
[I
[start
+len
/2]+h
];
98 for(i
=start
;i
<start
+len
;i
++) {
100 if(V
[I
[i
]+h
]==x
) kk
++;
108 } else if(V
[I
[i
]+h
]==x
) {
109 tmp
=I
[i
];I
[i
]=I
[jj
+j
];I
[jj
+j
]=tmp
;
112 tmp
=I
[i
];I
[i
]=I
[kk
+k
];I
[kk
+k
]=tmp
;
118 if(V
[I
[jj
+j
]+h
]==x
) {
121 tmp
=I
[jj
+j
];I
[jj
+j
]=I
[kk
+k
];I
[kk
+k
]=tmp
;
126 if(jj
>start
) split(I
,V
,start
,jj
-start
,h
);
128 for(i
=0;i
<kk
-jj
;i
++) V
[I
[jj
+i
]]=kk
-1;
129 if(jj
==kk
-1) I
[jj
]=-1;
131 if(start
+len
>kk
) split(I
,V
,kk
,start
+len
-kk
,h
);
135 qsufsort(PROffset32
*I
,PROffset32
*V
,unsigned char *old
,PROffset32 oldsize
)
137 PROffset32 buckets
[256];
140 for(i
=0;i
<256;i
++) buckets
[i
]=0;
141 for(i
=0;i
<oldsize
;i
++) buckets
[old
[i
]]++;
142 for(i
=1;i
<256;i
++) buckets
[i
]+=buckets
[i
-1];
143 for(i
=255;i
>0;i
--) buckets
[i
]=buckets
[i
-1];
146 for(i
=0;i
<oldsize
;i
++) I
[++buckets
[old
[i
]]]=i
;
148 for(i
=0;i
<oldsize
;i
++) V
[i
]=buckets
[old
[i
]];
150 for(i
=1;i
<256;i
++) if(buckets
[i
]==buckets
[i
-1]+1) I
[buckets
[i
]]=-1;
153 for(h
=1;I
[0]!=-(oldsize
+1);h
+=h
) {
155 for(i
=0;i
<oldsize
+1;) {
160 if(len
) I
[i
-len
]=-len
;
167 if(len
) I
[i
-len
]=-len
;
170 for(i
=0;i
<oldsize
+1;i
++) I
[V
[i
]]=i
;
174 matchlen(unsigned char *old
,PROffset32 oldsize
,unsigned char *newbuf
,PROffset32 newsize
)
178 for(i
=0;(i
<oldsize
)&&(i
<newsize
);i
++)
179 if(old
[i
]!=newbuf
[i
]) break;
185 search(PROffset32
*I
,unsigned char *old
,PROffset32 oldsize
,
186 unsigned char *newbuf
,PROffset32 newsize
,PROffset32 st
,PROffset32 en
,PROffset32
*pos
)
191 x
=matchlen(old
+I
[st
],oldsize
-I
[st
],newbuf
,newsize
);
192 y
=matchlen(old
+I
[en
],oldsize
-I
[en
],newbuf
,newsize
);
204 if(memcmp(old
+I
[x
],newbuf
,MIN(oldsize
-I
[x
],newsize
))<0) {
205 return search(I
,old
,oldsize
,newbuf
,newsize
,x
,en
,pos
);
207 return search(I
,old
,oldsize
,newbuf
,newsize
,st
,x
,pos
);
211 int main(int argc
,char *argv
[])
214 unsigned char *old
,*newbuf
;
215 PROffset32 oldsize
,newsize
;
218 PROffset32 scan
,pos
,len
;
219 PROffset32 lastscan
,lastpos
,lastoffset
;
220 PROffset32 oldscore
,scsc
;
222 PROffset32 s
,Sf
,lenf
,Sb
,lenb
;
223 PROffset32 overlap
,Ss
,lens
;
226 PROffset32 dblen
,eblen
;
227 unsigned char *db
,*eb
;
231 MBSPatchHeader header
= {
232 {'M','B','D','I','F','F','1','0'},
239 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv
[0]);
241 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
242 that we never try to malloc(0) and get a NULL pointer */
243 if(((fd
=open(argv
[1],O_RDONLY
|_O_BINARY
,0))<0) ||
244 ((oldsize
=lseek(fd
,0,SEEK_END
))==-1) ||
245 ((old
=(unsigned char*) malloc(oldsize
+1))==NULL
) ||
246 (lseek(fd
,0,SEEK_SET
)!=0) ||
247 (read(fd
,old
,oldsize
)!=oldsize
) ||
249 reporterr(1,"%s\n",argv
[1]);
251 scrc
= crc32(old
, oldsize
);
253 if(((I
=(PROffset32
*) malloc((oldsize
+1)*sizeof(PROffset32
)))==NULL
) ||
254 ((V
=(PROffset32
*) malloc((oldsize
+1)*sizeof(PROffset32
)))==NULL
))
257 qsufsort(I
,V
,old
,oldsize
);
261 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
262 that we never try to malloc(0) and get a NULL pointer */
263 if(((fd
=open(argv
[2],O_RDONLY
|_O_BINARY
,0))<0) ||
264 ((newsize
=lseek(fd
,0,SEEK_END
))==-1) ||
265 ((newbuf
=(unsigned char*) malloc(newsize
+1))==NULL
) ||
266 (lseek(fd
,0,SEEK_SET
)!=0) ||
267 (read(fd
,newbuf
,newsize
)!=newsize
) ||
268 (close(fd
)==-1)) reporterr(1,"%s\n",argv
[2]);
270 if(((db
=(unsigned char*) malloc(newsize
+1))==NULL
) ||
271 ((eb
=(unsigned char*) malloc(newsize
+1))==NULL
))
277 if((fd
=open(argv
[3],O_CREAT
|O_TRUNC
|O_WRONLY
|_O_BINARY
,0666))<0)
278 reporterr(1,"%s\n",argv
[3]);
280 /* start writing here */
282 /* We don't know the lengths yet, so we will write the header again
285 if(write(fd
,&header
,sizeof(MBSPatchHeader
))!=sizeof(MBSPatchHeader
))
286 reporterr(1,"%s\n",argv
[3]);
289 lastscan
=0;lastpos
=0;lastoffset
=0;
291 while(scan
<newsize
) {
294 for(scsc
=scan
+=len
;scan
<newsize
;scan
++) {
295 len
=search(I
,old
,oldsize
,newbuf
+scan
,newsize
-scan
,
298 for(;scsc
<scan
+len
;scsc
++)
299 if((scsc
+lastoffset
<oldsize
) &&
300 (old
[scsc
+lastoffset
] == newbuf
[scsc
]))
303 if(((len
==oldscore
) && (len
!=0)) ||
304 (len
>oldscore
+8)) break;
306 if((scan
+lastoffset
<oldsize
) &&
307 (old
[scan
+lastoffset
] == newbuf
[scan
]))
311 if((len
!=oldscore
) || (scan
==newsize
)) {
312 MBSPatchTriple triple
;
315 for(i
=0;(lastscan
+i
<scan
)&&(lastpos
+i
<oldsize
);) {
316 if(old
[lastpos
+i
]==newbuf
[lastscan
+i
]) s
++;
318 if(s
*2-i
>Sf
*2-lenf
) { Sf
=s
; lenf
=i
; };
324 for(i
=1;(scan
>=lastscan
+i
)&&(pos
>=i
);i
++) {
325 if(old
[pos
-i
]==newbuf
[scan
-i
]) s
++;
326 if(s
*2-i
>Sb
*2-lenb
) { Sb
=s
; lenb
=i
; };
330 if(lastscan
+lenf
>scan
-lenb
) {
331 overlap
=(lastscan
+lenf
)-(scan
-lenb
);
333 for(i
=0;i
<overlap
;i
++) {
334 if(newbuf
[lastscan
+lenf
-overlap
+i
]==
335 old
[lastpos
+lenf
-overlap
+i
]) s
++;
336 if(newbuf
[scan
-lenb
+i
]==
337 old
[pos
-lenb
+i
]) s
--;
338 if(s
>Ss
) { Ss
=s
; lens
=i
+1; };
346 db
[dblen
+i
]=newbuf
[lastscan
+i
]-old
[lastpos
+i
];
347 for(i
=0;i
<(scan
-lenb
)-(lastscan
+lenf
);i
++)
348 eb
[eblen
+i
]=newbuf
[lastscan
+lenf
+i
];
351 eblen
+=(scan
-lenb
)-(lastscan
+lenf
);
353 triple
.x
= htonl(lenf
);
354 triple
.y
= htonl((scan
-lenb
)-(lastscan
+lenf
));
355 triple
.z
= htonl((pos
-lenb
)-(lastpos
+lenf
));
356 if (write(fd
,&triple
,sizeof(triple
)) != sizeof(triple
))
359 #ifdef DEBUG_bsmedberg
360 printf("Writing a block:\n"
365 (PRUint32
) ((scan
-lenb
)-(lastscan
+lenf
)),
366 (PRUint32
) ((pos
-lenb
)-(lastpos
+lenf
)));
377 if(write(fd
,db
,dblen
)!=dblen
)
380 if(write(fd
,eb
,eblen
)!=eblen
)
383 header
.slen
= htonl(oldsize
);
384 header
.scrc32
= htonl(scrc
);
385 header
.dlen
= htonl(newsize
);
386 header
.cblen
= htonl(numtriples
* sizeof(MBSPatchTriple
));
387 header
.difflen
= htonl(dblen
);
388 header
.extralen
= htonl(eblen
);
390 if (lseek(fd
,0,SEEK_SET
) == -1 ||
391 write(fd
,&header
,sizeof(header
)) != sizeof(header
) ||