Version 7.6.3.2-android, tag libreoffice-7.6.3.2-android
[LibreOffice.git] / onlineupdate / source / mbsdiff / bsdiff.cxx
blob72bbcde1899f25497f0ed2e6fa24129cac970b37
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 bsdiff.c -- Binary patch generator.
5 Copyright 2003 Colin Percival
7 For the terms under which this work may be distributed, please see
8 the adjoining file "LICENSE".
10 ChangeLog:
11 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
12 values throughout.
13 --Benjamin Smedberg <benjamin@smedbergs.us>
14 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
15 provided by libbz2.
16 --Darin Fisher <darin@meer.net>
19 #include "bspatch.h"
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <fcntl.h>
25 #include <stdarg.h>
26 #ifdef _WIN32
27 #include <io.h>
28 #include <winsock2.h>
29 #else
30 #include <unistd.h>
31 #include <arpa/inet.h>
32 #define _O_BINARY 0
33 #endif
35 #undef MIN
36 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
38 /*---------------------------------------------------------------------------*/
40 /* This variable lives in libbz2. It's declared in bzlib_private.h, so we just
41 * declare it here to avoid including that entire header file.
43 extern "C" unsigned int BZ2_crc32Table[256];
45 static unsigned int
46 crc32(const unsigned char *buf, unsigned int len)
48 unsigned int crc = 0xffffffffL;
50 const unsigned char *end = buf + len;
51 for (; buf != end; ++buf)
52 crc = (crc << 8) ^ BZ2_crc32Table[(crc >> 24) ^ *buf];
54 crc = ~crc;
55 return crc;
58 /*---------------------------------------------------------------------------*/
60 static void
61 reporterr(int e, const char *fmt, ...)
63 if (fmt) {
64 va_list args;
65 va_start(args, fmt);
66 vfprintf(stderr, fmt, args);
67 va_end(args);
70 exit(e);
73 static void
74 split(int32_t *I,int32_t *V,int32_t start,int32_t len,int32_t h)
76 int32_t i,j,k,x,tmp,jj,kk;
78 if(len<16) {
79 for(k=start;k<start+len;k+=j) {
80 j=1;x=V[I[k]+h];
81 for(i=1;k+i<start+len;i++) {
82 if(V[I[k+i]+h]<x) {
83 x=V[I[k+i]+h];
84 j=0;
86 if(V[I[k+i]+h]==x) {
87 tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
88 j++;
91 for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
92 if(j==1) I[k]=-1;
94 return;
97 x=V[I[start+len/2]+h];
98 jj=0;kk=0;
99 for(i=start;i<start+len;i++) {
100 if(V[I[i]+h]<x) jj++;
101 if(V[I[i]+h]==x) kk++;
103 jj+=start;kk+=jj;
105 i=start;j=0;k=0;
106 while(i<jj) {
107 if(V[I[i]+h]<x) {
108 i++;
109 } else if(V[I[i]+h]==x) {
110 tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
111 j++;
112 } else {
113 tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
114 k++;
118 while(jj+j<kk) {
119 if(V[I[jj+j]+h]==x) {
120 j++;
121 } else {
122 tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
123 k++;
127 if(jj>start) split(I,V,start,jj-start,h);
129 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
130 if(jj==kk-1) I[jj]=-1;
132 if(start+len>kk) split(I,V,kk,start+len-kk,h);
135 static void
136 qsufsort(int32_t *I,int32_t *V,unsigned char *old,int32_t oldsize)
138 int32_t buckets[256];
139 int32_t i,h,len;
141 for(i=0;i<256;i++) buckets[i]=0;
142 for(i=0;i<oldsize;i++) buckets[old[i]]++;
143 for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
144 for(i=255;i>0;i--) buckets[i]=buckets[i-1];
145 buckets[0]=0;
147 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
148 I[0]=oldsize;
149 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
150 V[oldsize]=0;
151 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
152 I[0]=-1;
154 for(h=1;I[0]!=-(oldsize+1);h+=h) {
155 len=0;
156 for(i=0;i<oldsize+1;) {
157 if(I[i]<0) {
158 len-=I[i];
159 i-=I[i];
160 } else {
161 if(len) I[i-len]=-len;
162 len=V[I[i]]+1-i;
163 split(I,V,i,len,h);
164 i+=len;
165 len=0;
168 if(len) I[i-len]=-len;
171 for(i=0;i<oldsize+1;i++) I[V[i]]=i;
174 static int32_t
175 matchlen(unsigned char *old,int32_t oldsize,unsigned char *newbuf,int32_t newsize)
177 int32_t i;
179 for(i=0;(i<oldsize)&&(i<newsize);i++)
180 if(old[i]!=newbuf[i]) break;
182 return i;
185 static int32_t
186 search(int32_t *I,unsigned char *old,int32_t oldsize,
187 unsigned char *newbuf,int32_t newsize,int32_t st,int32_t en,int32_t *pos)
189 int32_t x,y;
191 if(en-st<2) {
192 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);
193 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);
195 if(x>y) {
196 *pos=I[st];
197 return x;
198 } else {
199 *pos=I[en];
200 return y;
204 x=st+(en-st)/2;
205 if(memcmp(old+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) {
206 return search(I,old,oldsize,newbuf,newsize,x,en,pos);
207 } else {
208 return search(I,old,oldsize,newbuf,newsize,st,x,pos);
212 int main(int argc,char *argv[])
214 int fd;
215 unsigned char *old = nullptr,*newbuf = nullptr;
216 int32_t oldsize = 0, newsize = 0;
217 int32_t *I = nullptr,*V = nullptr;
219 int32_t scan,pos = 0,len;
220 int32_t lastscan,lastpos,lastoffset;
221 int32_t oldscore,scsc;
223 int32_t s,Sf,lenf,Sb,lenb;
224 int32_t overlap,Ss,lens;
225 int32_t i;
227 int32_t dblen,eblen;
228 unsigned char *db,*eb = nullptr;
230 unsigned int scrc;
232 MBSPatchHeader header = {
233 {'M','B','D','I','F','F','1','0'},
234 0, 0, 0, 0, 0, 0
237 uint32_t numtriples;
239 if(argc!=4)
240 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv[0]);
242 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
243 that we never try to malloc(0) and get a NULL pointer */
244 if(((fd=open(argv[1],O_RDONLY|_O_BINARY,0))<0) ||
245 ((oldsize=lseek(fd,0,SEEK_END))==-1) ||
246 ((old=(unsigned char*) malloc(oldsize+1))==NULL) ||
247 (lseek(fd,0,SEEK_SET)!=0) ||
248 (read(fd,old,oldsize)!=oldsize) ||
249 (close(fd)==-1))
250 reporterr(1,"%s\n",argv[1]);
252 scrc = crc32(old, oldsize);
254 if(((I=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL) ||
255 ((V=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL))
256 reporterr(1,NULL);
258 qsufsort(I,V,old,oldsize);
260 free(V);
262 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
263 that we never try to malloc(0) and get a NULL pointer */
264 if(((fd=open(argv[2],O_RDONLY|_O_BINARY,0))<0) ||
265 ((newsize=lseek(fd,0,SEEK_END))==-1) ||
266 ((newbuf=(unsigned char*) malloc(newsize+1))==NULL) ||
267 (lseek(fd,0,SEEK_SET)!=0) ||
268 (read(fd,newbuf,newsize)!=newsize) ||
269 (close(fd)==-1)) reporterr(1,"%s\n",argv[2]);
271 if(((db=(unsigned char*) malloc(newsize+1))==NULL) ||
272 ((eb=(unsigned char*) malloc(newsize+1))==NULL))
273 reporterr(1,NULL);
275 dblen=0;
276 eblen=0;
278 if((fd=open(argv[3],O_CREAT|O_TRUNC|O_WRONLY|_O_BINARY,0666))<0)
279 reporterr(1,"%s\n",argv[3]);
281 /* start writing here */
283 /* We don't know the lengths yet, so we will write the header again
284 at the end */
286 if(write(fd,&header,sizeof(MBSPatchHeader))!=sizeof(MBSPatchHeader))
287 reporterr(1,"%s\n",argv[3]);
289 scan=0;len=0;
290 lastscan=0;lastpos=0;lastoffset=0;
291 numtriples = 0;
292 while(scan<newsize) {
293 oldscore=0;
295 for(scsc=scan+=len;scan<newsize;scan++) {
296 len=search(I,old,oldsize,newbuf+scan,newsize-scan,
297 0,oldsize,&pos);
299 for(;scsc<scan+len;scsc++)
300 if((scsc+lastoffset<oldsize) &&
301 (old[scsc+lastoffset] == newbuf[scsc]))
302 oldscore++;
304 if(((len==oldscore) && (len!=0)) ||
305 (len>oldscore+8)) break;
307 if((scan+lastoffset<oldsize) &&
308 (old[scan+lastoffset] == newbuf[scan]))
309 oldscore--;
312 if((len!=oldscore) || (scan==newsize)) {
313 MBSPatchTriple triple;
315 s=0;Sf=0;lenf=0;
316 for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
317 if(old[lastpos+i]==newbuf[lastscan+i]) s++;
318 i++;
319 if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
322 lenb=0;
323 if(scan<newsize) {
324 s=0;Sb=0;
325 for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
326 if(old[pos-i]==newbuf[scan-i]) s++;
327 if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
331 if(lastscan+lenf>scan-lenb) {
332 overlap=(lastscan+lenf)-(scan-lenb);
333 s=0;Ss=0;lens=0;
334 for(i=0;i<overlap;i++) {
335 if(newbuf[lastscan+lenf-overlap+i]==
336 old[lastpos+lenf-overlap+i]) s++;
337 if(newbuf[scan-lenb+i]==
338 old[pos-lenb+i]) s--;
339 if(s>Ss) { Ss=s; lens=i+1; };
342 lenf+=lens-overlap;
343 lenb-=lens;
346 for(i=0;i<lenf;i++)
347 db[dblen+i]=newbuf[lastscan+i]-old[lastpos+i];
348 for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
349 eb[eblen+i]=newbuf[lastscan+lenf+i];
351 dblen+=lenf;
352 eblen+=(scan-lenb)-(lastscan+lenf);
354 triple.x = htonl(lenf);
355 triple.y = htonl((scan-lenb)-(lastscan+lenf));
356 triple.z = htonl((pos-lenb)-(lastpos+lenf));
357 if (write(fd,&triple,sizeof(triple)) != sizeof(triple))
358 reporterr(1,NULL);
360 #ifdef DEBUG_bsmedberg
361 printf("Writing a block:\n"
362 " X: %u\n"
363 " Y: %u\n"
364 " Z: %i\n",
365 (uint32_t) lenf,
366 (uint32_t) ((scan-lenb)-(lastscan+lenf)),
367 (uint32_t) ((pos-lenb)-(lastpos+lenf)));
368 #endif
370 ++numtriples;
372 lastscan=scan-lenb;
373 lastpos=pos-lenb;
374 lastoffset=pos-scan;
378 if(write(fd,db,dblen)!=dblen)
379 reporterr(1,NULL);
381 if(write(fd,eb,eblen)!=eblen)
382 reporterr(1,NULL);
384 header.slen = htonl(oldsize);
385 header.scrc32 = htonl(scrc);
386 header.dlen = htonl(newsize);
387 header.cblen = htonl(numtriples * sizeof(MBSPatchTriple));
388 header.difflen = htonl(dblen);
389 header.extralen = htonl(eblen);
391 if (lseek(fd,0,SEEK_SET) == -1 ||
392 write(fd,&header,sizeof(header)) != sizeof(header) ||
393 close(fd) == -1)
394 reporterr(1,NULL);
396 free(db);
397 free(eb);
398 free(I);
399 free(old);
400 free(newbuf);
402 return 0;
405 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */