1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 bsdiff.c -- Binary patch generator.
5 Copyright 2003 Colin Percival
7 For the terms under which this work may be distributed, please see
8 the adjoining file "LICENSE".
11 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
13 --Benjamin Smedberg <benjamin@smedbergs.us>
14 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
16 --Darin Fisher <darin@meer.net>
31 #include <arpa/inet.h>
36 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
38 /*---------------------------------------------------------------------------*/
40 /* This variable lives in libbz2. It's declared in bzlib_private.h, so we just
41 * declare it here to avoid including that entire header file.
43 extern "C" unsigned int BZ2_crc32Table
[256];
46 crc32(const unsigned char *buf
, unsigned int len
)
48 unsigned int crc
= 0xffffffffL
;
50 const unsigned char *end
= buf
+ len
;
51 for (; buf
!= end
; ++buf
)
52 crc
= (crc
<< 8) ^ BZ2_crc32Table
[(crc
>> 24) ^ *buf
];
58 /*---------------------------------------------------------------------------*/
61 reporterr(int e
, const char *fmt
, ...)
66 vfprintf(stderr
, fmt
, args
);
74 split(int32_t *I
,int32_t *V
,int32_t start
,int32_t len
,int32_t h
)
76 int32_t i
,j
,k
,x
,tmp
,jj
,kk
;
79 for(k
=start
;k
<start
+len
;k
+=j
) {
81 for(i
=1;k
+i
<start
+len
;i
++) {
87 tmp
=I
[k
+j
];I
[k
+j
]=I
[k
+i
];I
[k
+i
]=tmp
;
91 for(i
=0;i
<j
;i
++) V
[I
[k
+i
]]=k
+j
-1;
97 x
=V
[I
[start
+len
/2]+h
];
99 for(i
=start
;i
<start
+len
;i
++) {
100 if(V
[I
[i
]+h
]<x
) jj
++;
101 if(V
[I
[i
]+h
]==x
) kk
++;
109 } else if(V
[I
[i
]+h
]==x
) {
110 tmp
=I
[i
];I
[i
]=I
[jj
+j
];I
[jj
+j
]=tmp
;
113 tmp
=I
[i
];I
[i
]=I
[kk
+k
];I
[kk
+k
]=tmp
;
119 if(V
[I
[jj
+j
]+h
]==x
) {
122 tmp
=I
[jj
+j
];I
[jj
+j
]=I
[kk
+k
];I
[kk
+k
]=tmp
;
127 if(jj
>start
) split(I
,V
,start
,jj
-start
,h
);
129 for(i
=0;i
<kk
-jj
;i
++) V
[I
[jj
+i
]]=kk
-1;
130 if(jj
==kk
-1) I
[jj
]=-1;
132 if(start
+len
>kk
) split(I
,V
,kk
,start
+len
-kk
,h
);
136 qsufsort(int32_t *I
,int32_t *V
,unsigned char *old
,int32_t oldsize
)
138 int32_t buckets
[256];
141 for(i
=0;i
<256;i
++) buckets
[i
]=0;
142 for(i
=0;i
<oldsize
;i
++) buckets
[old
[i
]]++;
143 for(i
=1;i
<256;i
++) buckets
[i
]+=buckets
[i
-1];
144 for(i
=255;i
>0;i
--) buckets
[i
]=buckets
[i
-1];
147 for(i
=0;i
<oldsize
;i
++) I
[++buckets
[old
[i
]]]=i
;
149 for(i
=0;i
<oldsize
;i
++) V
[i
]=buckets
[old
[i
]];
151 for(i
=1;i
<256;i
++) if(buckets
[i
]==buckets
[i
-1]+1) I
[buckets
[i
]]=-1;
154 for(h
=1;I
[0]!=-(oldsize
+1);h
+=h
) {
156 for(i
=0;i
<oldsize
+1;) {
161 if(len
) I
[i
-len
]=-len
;
168 if(len
) I
[i
-len
]=-len
;
171 for(i
=0;i
<oldsize
+1;i
++) I
[V
[i
]]=i
;
175 matchlen(unsigned char *old
,int32_t oldsize
,unsigned char *newbuf
,int32_t newsize
)
179 for(i
=0;(i
<oldsize
)&&(i
<newsize
);i
++)
180 if(old
[i
]!=newbuf
[i
]) break;
186 search(int32_t *I
,unsigned char *old
,int32_t oldsize
,
187 unsigned char *newbuf
,int32_t newsize
,int32_t st
,int32_t en
,int32_t *pos
)
192 x
=matchlen(old
+I
[st
],oldsize
-I
[st
],newbuf
,newsize
);
193 y
=matchlen(old
+I
[en
],oldsize
-I
[en
],newbuf
,newsize
);
205 if(memcmp(old
+I
[x
],newbuf
,MIN(oldsize
-I
[x
],newsize
))<0) {
206 return search(I
,old
,oldsize
,newbuf
,newsize
,x
,en
,pos
);
208 return search(I
,old
,oldsize
,newbuf
,newsize
,st
,x
,pos
);
212 int main(int argc
,char *argv
[])
215 unsigned char *old
= nullptr,*newbuf
= nullptr;
216 int32_t oldsize
= 0, newsize
= 0;
217 int32_t *I
= nullptr,*V
= nullptr;
219 int32_t scan
,pos
= 0,len
;
220 int32_t lastscan
,lastpos
,lastoffset
;
221 int32_t oldscore
,scsc
;
223 int32_t s
,Sf
,lenf
,Sb
,lenb
;
224 int32_t overlap
,Ss
,lens
;
228 unsigned char *db
,*eb
= nullptr;
232 MBSPatchHeader header
= {
233 {'M','B','D','I','F','F','1','0'},
240 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv
[0]);
242 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
243 that we never try to malloc(0) and get a NULL pointer */
244 if(((fd
=open(argv
[1],O_RDONLY
|_O_BINARY
,0))<0) ||
245 ((oldsize
=lseek(fd
,0,SEEK_END
))==-1) ||
246 ((old
=(unsigned char*) malloc(oldsize
+1))==NULL
) ||
247 (lseek(fd
,0,SEEK_SET
)!=0) ||
248 (read(fd
,old
,oldsize
)!=oldsize
) ||
250 reporterr(1,"%s\n",argv
[1]);
252 scrc
= crc32(old
, oldsize
);
254 if(((I
=(int32_t*) malloc((oldsize
+1)*sizeof(int32_t)))==NULL
) ||
255 ((V
=(int32_t*) malloc((oldsize
+1)*sizeof(int32_t)))==NULL
))
258 qsufsort(I
,V
,old
,oldsize
);
262 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
263 that we never try to malloc(0) and get a NULL pointer */
264 if(((fd
=open(argv
[2],O_RDONLY
|_O_BINARY
,0))<0) ||
265 ((newsize
=lseek(fd
,0,SEEK_END
))==-1) ||
266 ((newbuf
=(unsigned char*) malloc(newsize
+1))==NULL
) ||
267 (lseek(fd
,0,SEEK_SET
)!=0) ||
268 (read(fd
,newbuf
,newsize
)!=newsize
) ||
269 (close(fd
)==-1)) reporterr(1,"%s\n",argv
[2]);
271 if(((db
=(unsigned char*) malloc(newsize
+1))==NULL
) ||
272 ((eb
=(unsigned char*) malloc(newsize
+1))==NULL
))
278 if((fd
=open(argv
[3],O_CREAT
|O_TRUNC
|O_WRONLY
|_O_BINARY
,0666))<0)
279 reporterr(1,"%s\n",argv
[3]);
281 /* start writing here */
283 /* We don't know the lengths yet, so we will write the header again
286 if(write(fd
,&header
,sizeof(MBSPatchHeader
))!=sizeof(MBSPatchHeader
))
287 reporterr(1,"%s\n",argv
[3]);
290 lastscan
=0;lastpos
=0;lastoffset
=0;
292 while(scan
<newsize
) {
295 for(scsc
=scan
+=len
;scan
<newsize
;scan
++) {
296 len
=search(I
,old
,oldsize
,newbuf
+scan
,newsize
-scan
,
299 for(;scsc
<scan
+len
;scsc
++)
300 if((scsc
+lastoffset
<oldsize
) &&
301 (old
[scsc
+lastoffset
] == newbuf
[scsc
]))
304 if(((len
==oldscore
) && (len
!=0)) ||
305 (len
>oldscore
+8)) break;
307 if((scan
+lastoffset
<oldsize
) &&
308 (old
[scan
+lastoffset
] == newbuf
[scan
]))
312 if((len
!=oldscore
) || (scan
==newsize
)) {
313 MBSPatchTriple triple
;
316 for(i
=0;(lastscan
+i
<scan
)&&(lastpos
+i
<oldsize
);) {
317 if(old
[lastpos
+i
]==newbuf
[lastscan
+i
]) s
++;
319 if(s
*2-i
>Sf
*2-lenf
) { Sf
=s
; lenf
=i
; };
325 for(i
=1;(scan
>=lastscan
+i
)&&(pos
>=i
);i
++) {
326 if(old
[pos
-i
]==newbuf
[scan
-i
]) s
++;
327 if(s
*2-i
>Sb
*2-lenb
) { Sb
=s
; lenb
=i
; };
331 if(lastscan
+lenf
>scan
-lenb
) {
332 overlap
=(lastscan
+lenf
)-(scan
-lenb
);
334 for(i
=0;i
<overlap
;i
++) {
335 if(newbuf
[lastscan
+lenf
-overlap
+i
]==
336 old
[lastpos
+lenf
-overlap
+i
]) s
++;
337 if(newbuf
[scan
-lenb
+i
]==
338 old
[pos
-lenb
+i
]) s
--;
339 if(s
>Ss
) { Ss
=s
; lens
=i
+1; };
347 db
[dblen
+i
]=newbuf
[lastscan
+i
]-old
[lastpos
+i
];
348 for(i
=0;i
<(scan
-lenb
)-(lastscan
+lenf
);i
++)
349 eb
[eblen
+i
]=newbuf
[lastscan
+lenf
+i
];
352 eblen
+=(scan
-lenb
)-(lastscan
+lenf
);
354 triple
.x
= htonl(lenf
);
355 triple
.y
= htonl((scan
-lenb
)-(lastscan
+lenf
));
356 triple
.z
= htonl((pos
-lenb
)-(lastpos
+lenf
));
357 if (write(fd
,&triple
,sizeof(triple
)) != sizeof(triple
))
360 #ifdef DEBUG_bsmedberg
361 printf("Writing a block:\n"
366 (uint32_t) ((scan
-lenb
)-(lastscan
+lenf
)),
367 (uint32_t) ((pos
-lenb
)-(lastpos
+lenf
)));
378 if(write(fd
,db
,dblen
)!=dblen
)
381 if(write(fd
,eb
,eblen
)!=eblen
)
384 header
.slen
= htonl(oldsize
);
385 header
.scrc32
= htonl(scrc
);
386 header
.dlen
= htonl(newsize
);
387 header
.cblen
= htonl(numtriples
* sizeof(MBSPatchTriple
));
388 header
.difflen
= htonl(dblen
);
389 header
.extralen
= htonl(eblen
);
391 if (lseek(fd
,0,SEEK_SET
) == -1 ||
392 write(fd
,&header
,sizeof(header
)) != sizeof(header
) ||
405 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */