Fixes default log output to console for macOS
[sqlcipher.git] / ext / misc / fossildelta.c
blobe638737d2b2dfba7734caf053b06e7c1aaef4f61
1 /*
2 ** 2019-02-19
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** This SQLite extension implements the delta functions used by the RBU
14 ** extension. Three scalar functions and one table-valued function are
15 ** implemented here:
17 ** delta_apply(X,D) -- apply delta D to file X and return the result
18 ** delta_create(X,Y) -- compute and return a delta that carries X into Y
19 ** delta_output_size(D) -- blob size in bytes output from applying delta D
20 ** delta_parse(D) -- returns rows describing delta D
22 ** The delta format is the Fossil delta format, described in a comment
23 ** on the delete_create() function implementation below, and also at
25 ** https://www.fossil-scm.org/fossil/doc/trunk/www/delta_format.wiki
27 ** This delta format is used by the RBU extension, which is the main
28 ** reason that these routines are included in the extension library.
29 ** RBU does not use this extension directly. Rather, this extension is
30 ** provided as a convenience to developers who want to analyze RBU files
31 ** that contain deltas.
33 #include <string.h>
34 #include <assert.h>
35 #include <stdlib.h>
36 #include "sqlite3ext.h"
37 SQLITE_EXTENSION_INIT1
39 #ifndef SQLITE_AMALGAMATION
41 ** The "u32" type must be an unsigned 32-bit integer. Adjust this
43 typedef unsigned int u32;
46 ** Must be a 16-bit value
48 typedef short int s16;
49 typedef unsigned short int u16;
51 #endif /* SQLITE_AMALGAMATION */
55 ** The width of a hash window in bytes. The algorithm only works if this
56 ** is a power of 2.
58 #define NHASH 16
61 ** The current state of the rolling hash.
63 ** z[] holds the values that have been hashed. z[] is a circular buffer.
64 ** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of
65 ** the window.
67 ** Hash.a is the sum of all elements of hash.z[]. Hash.b is a weighted
68 ** sum. Hash.b is z[i]*NHASH + z[i+1]*(NHASH-1) + ... + z[i+NHASH-1]*1.
69 ** (Each index for z[] should be module NHASH, of course. The %NHASH operator
70 ** is omitted in the prior expression for brevity.)
72 typedef struct hash hash;
73 struct hash {
74 u16 a, b; /* Hash values */
75 u16 i; /* Start of the hash window */
76 char z[NHASH]; /* The values that have been hashed */
80 ** Initialize the rolling hash using the first NHASH characters of z[]
82 static void hash_init(hash *pHash, const char *z){
83 u16 a, b, i;
84 a = b = z[0];
85 for(i=1; i<NHASH; i++){
86 a += z[i];
87 b += a;
89 memcpy(pHash->z, z, NHASH);
90 pHash->a = a & 0xffff;
91 pHash->b = b & 0xffff;
92 pHash->i = 0;
96 ** Advance the rolling hash by a single character "c"
98 static void hash_next(hash *pHash, int c){
99 u16 old = pHash->z[pHash->i];
100 pHash->z[pHash->i] = c;
101 pHash->i = (pHash->i+1)&(NHASH-1);
102 pHash->a = pHash->a - old + c;
103 pHash->b = pHash->b - NHASH*old + pHash->a;
107 ** Return a 32-bit hash value
109 static u32 hash_32bit(hash *pHash){
110 return (pHash->a & 0xffff) | (((u32)(pHash->b & 0xffff))<<16);
114 ** Compute a hash on NHASH bytes.
116 ** This routine is intended to be equivalent to:
117 ** hash h;
118 ** hash_init(&h, zInput);
119 ** return hash_32bit(&h);
121 static u32 hash_once(const char *z){
122 u16 a, b, i;
123 a = b = z[0];
124 for(i=1; i<NHASH; i++){
125 a += z[i];
126 b += a;
128 return a | (((u32)b)<<16);
132 ** Write an base-64 integer into the given buffer.
134 static void putInt(unsigned int v, char **pz){
135 static const char zDigits[] =
136 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~";
137 /* 123456789 123456789 123456789 123456789 123456789 123456789 123 */
138 int i, j;
139 char zBuf[20];
140 if( v==0 ){
141 *(*pz)++ = '0';
142 return;
144 for(i=0; v>0; i++, v>>=6){
145 zBuf[i] = zDigits[v&0x3f];
147 for(j=i-1; j>=0; j--){
148 *(*pz)++ = zBuf[j];
153 ** Read bytes from *pz and convert them into a positive integer. When
154 ** finished, leave *pz pointing to the first character past the end of
155 ** the integer. The *pLen parameter holds the length of the string
156 ** in *pz and is decremented once for each character in the integer.
158 static unsigned int deltaGetInt(const char **pz, int *pLen){
159 static const signed char zValue[] = {
160 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
161 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
162 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
163 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
164 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
165 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, 36,
166 -1, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
167 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, -1, -1, -1, 63, -1,
169 unsigned int v = 0;
170 int c;
171 unsigned char *z = (unsigned char*)*pz;
172 unsigned char *zStart = z;
173 while( (c = zValue[0x7f&*(z++)])>=0 ){
174 v = (v<<6) + c;
176 z--;
177 *pLen -= z - zStart;
178 *pz = (char*)z;
179 return v;
183 ** Return the number digits in the base-64 representation of a positive integer
185 static int digit_count(int v){
186 unsigned int i, x;
187 for(i=1, x=64; v>=x; i++, x <<= 6){}
188 return i;
191 #ifdef __GNUC__
192 # define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__)
193 #else
194 # define GCC_VERSION 0
195 #endif
198 ** Compute a 32-bit big-endian checksum on the N-byte buffer. If the
199 ** buffer is not a multiple of 4 bytes length, compute the sum that would
200 ** have occurred if the buffer was padded with zeros to the next multiple
201 ** of four bytes.
203 static unsigned int checksum(const char *zIn, size_t N){
204 static const int byteOrderTest = 1;
205 const unsigned char *z = (const unsigned char *)zIn;
206 const unsigned char *zEnd = (const unsigned char*)&zIn[N&~3];
207 unsigned sum = 0;
208 assert( (z - (const unsigned char*)0)%4==0 ); /* Four-byte alignment */
209 if( 0==*(char*)&byteOrderTest ){
210 /* This is a big-endian machine */
211 while( z<zEnd ){
212 sum += *(unsigned*)z;
213 z += 4;
215 }else{
216 /* A little-endian machine */
217 #if GCC_VERSION>=4003000
218 while( z<zEnd ){
219 sum += __builtin_bswap32(*(unsigned*)z);
220 z += 4;
222 #elif defined(_MSC_VER) && _MSC_VER>=1300
223 while( z<zEnd ){
224 sum += _byteswap_ulong(*(unsigned*)z);
225 z += 4;
227 #else
228 unsigned sum0 = 0;
229 unsigned sum1 = 0;
230 unsigned sum2 = 0;
231 while(N >= 16){
232 sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]);
233 sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]);
234 sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]);
235 sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]);
236 z += 16;
237 N -= 16;
239 while(N >= 4){
240 sum0 += z[0];
241 sum1 += z[1];
242 sum2 += z[2];
243 sum += z[3];
244 z += 4;
245 N -= 4;
247 sum += (sum2 << 8) + (sum1 << 16) + (sum0 << 24);
248 #endif
250 switch(N&3){
251 case 3: sum += (z[2] << 8);
252 case 2: sum += (z[1] << 16);
253 case 1: sum += (z[0] << 24);
254 default: ;
256 return sum;
260 ** Create a new delta.
262 ** The delta is written into a preallocated buffer, zDelta, which
263 ** should be at least 60 bytes longer than the target file, zOut.
264 ** The delta string will be NUL-terminated, but it might also contain
265 ** embedded NUL characters if either the zSrc or zOut files are
266 ** binary. This function returns the length of the delta string
267 ** in bytes, excluding the final NUL terminator character.
269 ** Output Format:
271 ** The delta begins with a base64 number followed by a newline. This
272 ** number is the number of bytes in the TARGET file. Thus, given a
273 ** delta file z, a program can compute the size of the output file
274 ** simply by reading the first line and decoding the base-64 number
275 ** found there. The delta_output_size() routine does exactly this.
277 ** After the initial size number, the delta consists of a series of
278 ** literal text segments and commands to copy from the SOURCE file.
279 ** A copy command looks like this:
281 ** NNN@MMM,
283 ** where NNN is the number of bytes to be copied and MMM is the offset
284 ** into the source file of the first byte (both base-64). If NNN is 0
285 ** it means copy the rest of the input file. Literal text is like this:
287 ** NNN:TTTTT
289 ** where NNN is the number of bytes of text (base-64) and TTTTT is the text.
291 ** The last term is of the form
293 ** NNN;
295 ** In this case, NNN is a 32-bit bigendian checksum of the output file
296 ** that can be used to verify that the delta applied correctly. All
297 ** numbers are in base-64.
299 ** Pure text files generate a pure text delta. Binary files generate a
300 ** delta that may contain some binary data.
302 ** Algorithm:
304 ** The encoder first builds a hash table to help it find matching
305 ** patterns in the source file. 16-byte chunks of the source file
306 ** sampled at evenly spaced intervals are used to populate the hash
307 ** table.
309 ** Next we begin scanning the target file using a sliding 16-byte
310 ** window. The hash of the 16-byte window in the target is used to
311 ** search for a matching section in the source file. When a match
312 ** is found, a copy command is added to the delta. An effort is
313 ** made to extend the matching section to regions that come before
314 ** and after the 16-byte hash window. A copy command is only issued
315 ** if the result would use less space that just quoting the text
316 ** literally. Literal text is added to the delta for sections that
317 ** do not match or which can not be encoded efficiently using copy
318 ** commands.
320 static int delta_create(
321 const char *zSrc, /* The source or pattern file */
322 unsigned int lenSrc, /* Length of the source file */
323 const char *zOut, /* The target file */
324 unsigned int lenOut, /* Length of the target file */
325 char *zDelta /* Write the delta into this buffer */
327 int i, base;
328 char *zOrigDelta = zDelta;
329 hash h;
330 int nHash; /* Number of hash table entries */
331 int *landmark; /* Primary hash table */
332 int *collide; /* Collision chain */
333 int lastRead = -1; /* Last byte of zSrc read by a COPY command */
335 /* Add the target file size to the beginning of the delta
337 putInt(lenOut, &zDelta);
338 *(zDelta++) = '\n';
340 /* If the source file is very small, it means that we have no
341 ** chance of ever doing a copy command. Just output a single
342 ** literal segment for the entire target and exit.
344 if( lenSrc<=NHASH ){
345 putInt(lenOut, &zDelta);
346 *(zDelta++) = ':';
347 memcpy(zDelta, zOut, lenOut);
348 zDelta += lenOut;
349 putInt(checksum(zOut, lenOut), &zDelta);
350 *(zDelta++) = ';';
351 return zDelta - zOrigDelta;
354 /* Compute the hash table used to locate matching sections in the
355 ** source file.
357 nHash = lenSrc/NHASH;
358 collide = sqlite3_malloc64( (sqlite3_int64)nHash*2*sizeof(int) );
359 memset(collide, -1, nHash*2*sizeof(int));
360 landmark = &collide[nHash];
361 for(i=0; i<lenSrc-NHASH; i+=NHASH){
362 int hv = hash_once(&zSrc[i]) % nHash;
363 collide[i/NHASH] = landmark[hv];
364 landmark[hv] = i/NHASH;
367 /* Begin scanning the target file and generating copy commands and
368 ** literal sections of the delta.
370 base = 0; /* We have already generated everything before zOut[base] */
371 while( base+NHASH<lenOut ){
372 int iSrc, iBlock;
373 unsigned int bestCnt, bestOfst=0, bestLitsz=0;
374 hash_init(&h, &zOut[base]);
375 i = 0; /* Trying to match a landmark against zOut[base+i] */
376 bestCnt = 0;
377 while( 1 ){
378 int hv;
379 int limit = 250;
381 hv = hash_32bit(&h) % nHash;
382 iBlock = landmark[hv];
383 while( iBlock>=0 && (limit--)>0 ){
385 ** The hash window has identified a potential match against
386 ** landmark block iBlock. But we need to investigate further.
388 ** Look for a region in zOut that matches zSrc. Anchor the search
389 ** at zSrc[iSrc] and zOut[base+i]. Do not include anything prior to
390 ** zOut[base] or after zOut[outLen] nor anything after zSrc[srcLen].
392 ** Set cnt equal to the length of the match and set ofst so that
393 ** zSrc[ofst] is the first element of the match. litsz is the number
394 ** of characters between zOut[base] and the beginning of the match.
395 ** sz will be the overhead (in bytes) needed to encode the copy
396 ** command. Only generate copy command if the overhead of the
397 ** copy command is less than the amount of literal text to be copied.
399 int cnt, ofst, litsz;
400 int j, k, x, y;
401 int sz;
402 int limitX;
404 /* Beginning at iSrc, match forwards as far as we can. j counts
405 ** the number of characters that match */
406 iSrc = iBlock*NHASH;
407 y = base+i;
408 limitX = ( lenSrc-iSrc <= lenOut-y ) ? lenSrc : iSrc + lenOut - y;
409 for(x=iSrc; x<limitX; x++, y++){
410 if( zSrc[x]!=zOut[y] ) break;
412 j = x - iSrc - 1;
414 /* Beginning at iSrc-1, match backwards as far as we can. k counts
415 ** the number of characters that match */
416 for(k=1; k<iSrc && k<=i; k++){
417 if( zSrc[iSrc-k]!=zOut[base+i-k] ) break;
419 k--;
421 /* Compute the offset and size of the matching region */
422 ofst = iSrc-k;
423 cnt = j+k+1;
424 litsz = i-k; /* Number of bytes of literal text before the copy */
425 /* sz will hold the number of bytes needed to encode the "insert"
426 ** command and the copy command, not counting the "insert" text */
427 sz = digit_count(i-k)+digit_count(cnt)+digit_count(ofst)+3;
428 if( cnt>=sz && cnt>bestCnt ){
429 /* Remember this match only if it is the best so far and it
430 ** does not increase the file size */
431 bestCnt = cnt;
432 bestOfst = iSrc-k;
433 bestLitsz = litsz;
436 /* Check the next matching block */
437 iBlock = collide[iBlock];
440 /* We have a copy command that does not cause the delta to be larger
441 ** than a literal insert. So add the copy command to the delta.
443 if( bestCnt>0 ){
444 if( bestLitsz>0 ){
445 /* Add an insert command before the copy */
446 putInt(bestLitsz,&zDelta);
447 *(zDelta++) = ':';
448 memcpy(zDelta, &zOut[base], bestLitsz);
449 zDelta += bestLitsz;
450 base += bestLitsz;
452 base += bestCnt;
453 putInt(bestCnt, &zDelta);
454 *(zDelta++) = '@';
455 putInt(bestOfst, &zDelta);
456 *(zDelta++) = ',';
457 if( bestOfst + bestCnt -1 > lastRead ){
458 lastRead = bestOfst + bestCnt - 1;
460 bestCnt = 0;
461 break;
464 /* If we reach this point, it means no match is found so far */
465 if( base+i+NHASH>=lenOut ){
466 /* We have reached the end of the file and have not found any
467 ** matches. Do an "insert" for everything that does not match */
468 putInt(lenOut-base, &zDelta);
469 *(zDelta++) = ':';
470 memcpy(zDelta, &zOut[base], lenOut-base);
471 zDelta += lenOut-base;
472 base = lenOut;
473 break;
476 /* Advance the hash by one character. Keep looking for a match */
477 hash_next(&h, zOut[base+i+NHASH]);
478 i++;
481 /* Output a final "insert" record to get all the text at the end of
482 ** the file that does not match anything in the source file.
484 if( base<lenOut ){
485 putInt(lenOut-base, &zDelta);
486 *(zDelta++) = ':';
487 memcpy(zDelta, &zOut[base], lenOut-base);
488 zDelta += lenOut-base;
490 /* Output the final checksum record. */
491 putInt(checksum(zOut, lenOut), &zDelta);
492 *(zDelta++) = ';';
493 sqlite3_free(collide);
494 return zDelta - zOrigDelta;
498 ** Return the size (in bytes) of the output from applying
499 ** a delta.
501 ** This routine is provided so that an procedure that is able
502 ** to call delta_apply() can learn how much space is required
503 ** for the output and hence allocate nor more space that is really
504 ** needed.
506 static int delta_output_size(const char *zDelta, int lenDelta){
507 int size;
508 size = deltaGetInt(&zDelta, &lenDelta);
509 if( *zDelta!='\n' ){
510 /* ERROR: size integer not terminated by "\n" */
511 return -1;
513 return size;
518 ** Apply a delta.
520 ** The output buffer should be big enough to hold the whole output
521 ** file and a NUL terminator at the end. The delta_output_size()
522 ** routine will determine this size for you.
524 ** The delta string should be null-terminated. But the delta string
525 ** may contain embedded NUL characters (if the input and output are
526 ** binary files) so we also have to pass in the length of the delta in
527 ** the lenDelta parameter.
529 ** This function returns the size of the output file in bytes (excluding
530 ** the final NUL terminator character). Except, if the delta string is
531 ** malformed or intended for use with a source file other than zSrc,
532 ** then this routine returns -1.
534 ** Refer to the delta_create() documentation above for a description
535 ** of the delta file format.
537 static int delta_apply(
538 const char *zSrc, /* The source or pattern file */
539 int lenSrc, /* Length of the source file */
540 const char *zDelta, /* Delta to apply to the pattern */
541 int lenDelta, /* Length of the delta */
542 char *zOut /* Write the output into this preallocated buffer */
544 unsigned int limit;
545 unsigned int total = 0;
546 #ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
547 char *zOrigOut = zOut;
548 #endif
550 limit = deltaGetInt(&zDelta, &lenDelta);
551 if( *zDelta!='\n' ){
552 /* ERROR: size integer not terminated by "\n" */
553 return -1;
555 zDelta++; lenDelta--;
556 while( *zDelta && lenDelta>0 ){
557 unsigned int cnt, ofst;
558 cnt = deltaGetInt(&zDelta, &lenDelta);
559 switch( zDelta[0] ){
560 case '@': {
561 zDelta++; lenDelta--;
562 ofst = deltaGetInt(&zDelta, &lenDelta);
563 if( lenDelta>0 && zDelta[0]!=',' ){
564 /* ERROR: copy command not terminated by ',' */
565 return -1;
567 zDelta++; lenDelta--;
568 total += cnt;
569 if( total>limit ){
570 /* ERROR: copy exceeds output file size */
571 return -1;
573 if( ofst+cnt > lenSrc ){
574 /* ERROR: copy extends past end of input */
575 return -1;
577 memcpy(zOut, &zSrc[ofst], cnt);
578 zOut += cnt;
579 break;
581 case ':': {
582 zDelta++; lenDelta--;
583 total += cnt;
584 if( total>limit ){
585 /* ERROR: insert command gives an output larger than predicted */
586 return -1;
588 if( cnt>lenDelta ){
589 /* ERROR: insert count exceeds size of delta */
590 return -1;
592 memcpy(zOut, zDelta, cnt);
593 zOut += cnt;
594 zDelta += cnt;
595 lenDelta -= cnt;
596 break;
598 case ';': {
599 zDelta++; lenDelta--;
600 zOut[0] = 0;
601 #ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
602 if( cnt!=checksum(zOrigOut, total) ){
603 /* ERROR: bad checksum */
604 return -1;
606 #endif
607 if( total!=limit ){
608 /* ERROR: generated size does not match predicted size */
609 return -1;
611 return total;
613 default: {
614 /* ERROR: unknown delta operator */
615 return -1;
619 /* ERROR: unterminated delta */
620 return -1;
624 ** SQL functions: delta_create(X,Y)
626 ** Return a delta for carrying X into Y.
628 static void deltaCreateFunc(
629 sqlite3_context *context,
630 int argc,
631 sqlite3_value **argv
633 const char *aOrig; int nOrig; /* old blob */
634 const char *aNew; int nNew; /* new blob */
635 char *aOut; int nOut; /* output delta */
637 assert( argc==2 );
638 if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
639 if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
640 nOrig = sqlite3_value_bytes(argv[0]);
641 aOrig = (const char*)sqlite3_value_blob(argv[0]);
642 nNew = sqlite3_value_bytes(argv[1]);
643 aNew = (const char*)sqlite3_value_blob(argv[1]);
644 aOut = sqlite3_malloc64(nNew+70);
645 if( aOut==0 ){
646 sqlite3_result_error_nomem(context);
647 }else{
648 nOut = delta_create(aOrig, nOrig, aNew, nNew, aOut);
649 if( nOut<0 ){
650 sqlite3_free(aOut);
651 sqlite3_result_error(context, "cannot create fossil delta", -1);
652 }else{
653 sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
659 ** SQL functions: delta_apply(X,D)
661 ** Return the result of applying delta D to input X.
663 static void deltaApplyFunc(
664 sqlite3_context *context,
665 int argc,
666 sqlite3_value **argv
668 const char *aOrig; int nOrig; /* The X input */
669 const char *aDelta; int nDelta; /* The input delta (D) */
670 char *aOut; int nOut, nOut2; /* The output */
672 assert( argc==2 );
673 if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
674 if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
675 nOrig = sqlite3_value_bytes(argv[0]);
676 aOrig = (const char*)sqlite3_value_blob(argv[0]);
677 nDelta = sqlite3_value_bytes(argv[1]);
678 aDelta = (const char*)sqlite3_value_blob(argv[1]);
680 /* Figure out the size of the output */
681 nOut = delta_output_size(aDelta, nDelta);
682 if( nOut<0 ){
683 sqlite3_result_error(context, "corrupt fossil delta", -1);
684 return;
686 aOut = sqlite3_malloc64((sqlite3_int64)nOut+1);
687 if( aOut==0 ){
688 sqlite3_result_error_nomem(context);
689 }else{
690 nOut2 = delta_apply(aOrig, nOrig, aDelta, nDelta, aOut);
691 if( nOut2!=nOut ){
692 sqlite3_free(aOut);
693 sqlite3_result_error(context, "corrupt fossil delta", -1);
694 }else{
695 sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
702 ** SQL functions: delta_output_size(D)
704 ** Return the size of the output that results from applying delta D.
706 static void deltaOutputSizeFunc(
707 sqlite3_context *context,
708 int argc,
709 sqlite3_value **argv
711 const char *aDelta; int nDelta; /* The input delta (D) */
712 int nOut; /* Size of output */
713 assert( argc==1 );
714 if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
715 nDelta = sqlite3_value_bytes(argv[0]);
716 aDelta = (const char*)sqlite3_value_blob(argv[0]);
718 /* Figure out the size of the output */
719 nOut = delta_output_size(aDelta, nDelta);
720 if( nOut<0 ){
721 sqlite3_result_error(context, "corrupt fossil delta", -1);
722 return;
723 }else{
724 sqlite3_result_int(context, nOut);
728 /*****************************************************************************
729 ** Table-valued SQL function: delta_parse(DELTA)
731 ** Schema:
733 ** CREATE TABLE delta_parse(
734 ** op TEXT,
735 ** a1 INT,
736 ** a2 ANY,
737 ** delta HIDDEN BLOB
738 ** );
740 ** Given an input DELTA, this function parses the delta and returns
741 ** rows for each entry in the delta. The op column has one of the
742 ** values SIZE, COPY, INSERT, CHECKSUM, ERROR.
744 ** Assuming no errors, the first row has op='SIZE'. a1 is the size of
745 ** the output in bytes and a2 is NULL.
747 ** After the initial SIZE row, there are zero or more 'COPY' and/or 'INSERT'
748 ** rows. A COPY row means content is copied from the source into the
749 ** output. Column a1 is the number of bytes to copy and a2 is the offset
750 ** into source from which to begin copying. An INSERT row means to
751 ** insert text into the output stream. Column a1 is the number of bytes
752 ** to insert and column is a BLOB that contains the text to be inserted.
754 ** The last row of a well-formed delta will have an op value of 'CHECKSUM'.
755 ** The a1 column will be the value of the checksum and a2 will be NULL.
757 ** If the input delta is not well-formed, then a row with an op value
758 ** of 'ERROR' is returned. The a1 value of the ERROR row is the offset
759 ** into the delta where the error was encountered and a2 is NULL.
761 typedef struct deltaparsevtab_vtab deltaparsevtab_vtab;
762 typedef struct deltaparsevtab_cursor deltaparsevtab_cursor;
763 struct deltaparsevtab_vtab {
764 sqlite3_vtab base; /* Base class - must be first */
765 /* No additional information needed */
767 struct deltaparsevtab_cursor {
768 sqlite3_vtab_cursor base; /* Base class - must be first */
769 char *aDelta; /* The delta being parsed */
770 int nDelta; /* Number of bytes in the delta */
771 int iCursor; /* Current cursor location */
772 int eOp; /* Name of current operator */
773 unsigned int a1, a2; /* Arguments to current operator */
774 int iNext; /* Next cursor value */
777 /* Operator names:
779 static const char *azOp[] = {
780 "SIZE", "COPY", "INSERT", "CHECKSUM", "ERROR", "EOF"
782 #define DELTAPARSE_OP_SIZE 0
783 #define DELTAPARSE_OP_COPY 1
784 #define DELTAPARSE_OP_INSERT 2
785 #define DELTAPARSE_OP_CHECKSUM 3
786 #define DELTAPARSE_OP_ERROR 4
787 #define DELTAPARSE_OP_EOF 5
790 ** The deltaparsevtabConnect() method is invoked to create a new
791 ** deltaparse virtual table.
793 ** Think of this routine as the constructor for deltaparsevtab_vtab objects.
795 ** All this routine needs to do is:
797 ** (1) Allocate the deltaparsevtab_vtab object and initialize all fields.
799 ** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the
800 ** result set of queries against the virtual table will look like.
802 static int deltaparsevtabConnect(
803 sqlite3 *db,
804 void *pAux,
805 int argc, const char *const*argv,
806 sqlite3_vtab **ppVtab,
807 char **pzErr
809 deltaparsevtab_vtab *pNew;
810 int rc;
812 rc = sqlite3_declare_vtab(db,
813 "CREATE TABLE x(op,a1,a2,delta HIDDEN)"
815 /* For convenience, define symbolic names for the index to each column. */
816 #define DELTAPARSEVTAB_OP 0
817 #define DELTAPARSEVTAB_A1 1
818 #define DELTAPARSEVTAB_A2 2
819 #define DELTAPARSEVTAB_DELTA 3
820 if( rc==SQLITE_OK ){
821 pNew = sqlite3_malloc64( sizeof(*pNew) );
822 *ppVtab = (sqlite3_vtab*)pNew;
823 if( pNew==0 ) return SQLITE_NOMEM;
824 memset(pNew, 0, sizeof(*pNew));
825 sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
827 return rc;
831 ** This method is the destructor for deltaparsevtab_vtab objects.
833 static int deltaparsevtabDisconnect(sqlite3_vtab *pVtab){
834 deltaparsevtab_vtab *p = (deltaparsevtab_vtab*)pVtab;
835 sqlite3_free(p);
836 return SQLITE_OK;
840 ** Constructor for a new deltaparsevtab_cursor object.
842 static int deltaparsevtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
843 deltaparsevtab_cursor *pCur;
844 pCur = sqlite3_malloc( sizeof(*pCur) );
845 if( pCur==0 ) return SQLITE_NOMEM;
846 memset(pCur, 0, sizeof(*pCur));
847 *ppCursor = &pCur->base;
848 return SQLITE_OK;
852 ** Destructor for a deltaparsevtab_cursor.
854 static int deltaparsevtabClose(sqlite3_vtab_cursor *cur){
855 deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
856 sqlite3_free(pCur->aDelta);
857 sqlite3_free(pCur);
858 return SQLITE_OK;
863 ** Advance a deltaparsevtab_cursor to its next row of output.
865 static int deltaparsevtabNext(sqlite3_vtab_cursor *cur){
866 deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
867 const char *z;
868 int i = 0;
870 pCur->iCursor = pCur->iNext;
871 z = pCur->aDelta + pCur->iCursor;
872 pCur->a1 = deltaGetInt(&z, &i);
873 switch( z[0] ){
874 case '@': {
875 z++;
876 pCur->a2 = deltaGetInt(&z, &i);
877 pCur->eOp = DELTAPARSE_OP_COPY;
878 pCur->iNext = (int)(&z[1] - pCur->aDelta);
879 break;
881 case ':': {
882 z++;
883 pCur->a2 = (unsigned int)(z - pCur->aDelta);
884 pCur->eOp = DELTAPARSE_OP_INSERT;
885 pCur->iNext = (int)(&z[pCur->a1] - pCur->aDelta);
886 break;
888 case ';': {
889 pCur->eOp = DELTAPARSE_OP_CHECKSUM;
890 pCur->iNext = pCur->nDelta;
891 break;
893 default: {
894 if( pCur->iNext==pCur->nDelta ){
895 pCur->eOp = DELTAPARSE_OP_EOF;
896 }else{
897 pCur->eOp = DELTAPARSE_OP_ERROR;
898 pCur->iNext = pCur->nDelta;
900 break;
903 return SQLITE_OK;
907 ** Return values of columns for the row at which the deltaparsevtab_cursor
908 ** is currently pointing.
910 static int deltaparsevtabColumn(
911 sqlite3_vtab_cursor *cur, /* The cursor */
912 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
913 int i /* Which column to return */
915 deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
916 switch( i ){
917 case DELTAPARSEVTAB_OP: {
918 sqlite3_result_text(ctx, azOp[pCur->eOp], -1, SQLITE_STATIC);
919 break;
921 case DELTAPARSEVTAB_A1: {
922 sqlite3_result_int(ctx, pCur->a1);
923 break;
925 case DELTAPARSEVTAB_A2: {
926 if( pCur->eOp==DELTAPARSE_OP_COPY ){
927 sqlite3_result_int(ctx, pCur->a2);
928 }else if( pCur->eOp==DELTAPARSE_OP_INSERT ){
929 sqlite3_result_blob(ctx, pCur->aDelta+pCur->a2, pCur->a1,
930 SQLITE_TRANSIENT);
932 break;
934 case DELTAPARSEVTAB_DELTA: {
935 sqlite3_result_blob(ctx, pCur->aDelta, pCur->nDelta, SQLITE_TRANSIENT);
936 break;
939 return SQLITE_OK;
943 ** Return the rowid for the current row. In this implementation, the
944 ** rowid is the same as the output value.
946 static int deltaparsevtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
947 deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
948 *pRowid = pCur->iCursor;
949 return SQLITE_OK;
953 ** Return TRUE if the cursor has been moved off of the last
954 ** row of output.
956 static int deltaparsevtabEof(sqlite3_vtab_cursor *cur){
957 deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
958 return pCur->eOp==DELTAPARSE_OP_EOF;
962 ** This method is called to "rewind" the deltaparsevtab_cursor object back
963 ** to the first row of output. This method is always called at least
964 ** once prior to any call to deltaparsevtabColumn() or deltaparsevtabRowid() or
965 ** deltaparsevtabEof().
967 static int deltaparsevtabFilter(
968 sqlite3_vtab_cursor *pVtabCursor,
969 int idxNum, const char *idxStr,
970 int argc, sqlite3_value **argv
972 deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor *)pVtabCursor;
973 const char *a;
974 int i = 0;
975 pCur->eOp = DELTAPARSE_OP_ERROR;
976 if( idxNum!=1 ){
977 return SQLITE_OK;
979 pCur->nDelta = sqlite3_value_bytes(argv[0]);
980 a = (const char*)sqlite3_value_blob(argv[0]);
981 if( pCur->nDelta==0 || a==0 ){
982 return SQLITE_OK;
984 pCur->aDelta = sqlite3_malloc64( pCur->nDelta+1 );
985 if( pCur->aDelta==0 ){
986 pCur->nDelta = 0;
987 return SQLITE_NOMEM;
989 memcpy(pCur->aDelta, a, pCur->nDelta);
990 pCur->aDelta[pCur->nDelta] = 0;
991 a = pCur->aDelta;
992 pCur->eOp = DELTAPARSE_OP_SIZE;
993 pCur->a1 = deltaGetInt(&a, &i);
994 if( a[0]!='\n' ){
995 pCur->eOp = DELTAPARSE_OP_ERROR;
996 pCur->a1 = pCur->a2 = 0;
997 pCur->iNext = pCur->nDelta;
998 return SQLITE_OK;
1000 a++;
1001 pCur->iNext = (unsigned int)(a - pCur->aDelta);
1002 return SQLITE_OK;
1006 ** SQLite will invoke this method one or more times while planning a query
1007 ** that uses the virtual table. This routine needs to create
1008 ** a query plan for each invocation and compute an estimated cost for that
1009 ** plan.
1011 static int deltaparsevtabBestIndex(
1012 sqlite3_vtab *tab,
1013 sqlite3_index_info *pIdxInfo
1015 int i;
1016 for(i=0; i<pIdxInfo->nConstraint; i++){
1017 if( pIdxInfo->aConstraint[i].iColumn != DELTAPARSEVTAB_DELTA ) continue;
1018 if( pIdxInfo->aConstraint[i].usable==0 ) continue;
1019 if( pIdxInfo->aConstraint[i].op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue;
1020 pIdxInfo->aConstraintUsage[i].argvIndex = 1;
1021 pIdxInfo->aConstraintUsage[i].omit = 1;
1022 pIdxInfo->estimatedCost = (double)1;
1023 pIdxInfo->estimatedRows = 10;
1024 pIdxInfo->idxNum = 1;
1025 return SQLITE_OK;
1027 pIdxInfo->idxNum = 0;
1028 pIdxInfo->estimatedCost = (double)0x7fffffff;
1029 pIdxInfo->estimatedRows = 0x7fffffff;
1030 return SQLITE_CONSTRAINT;
1034 ** This following structure defines all the methods for the
1035 ** virtual table.
1037 static sqlite3_module deltaparsevtabModule = {
1038 /* iVersion */ 0,
1039 /* xCreate */ 0,
1040 /* xConnect */ deltaparsevtabConnect,
1041 /* xBestIndex */ deltaparsevtabBestIndex,
1042 /* xDisconnect */ deltaparsevtabDisconnect,
1043 /* xDestroy */ 0,
1044 /* xOpen */ deltaparsevtabOpen,
1045 /* xClose */ deltaparsevtabClose,
1046 /* xFilter */ deltaparsevtabFilter,
1047 /* xNext */ deltaparsevtabNext,
1048 /* xEof */ deltaparsevtabEof,
1049 /* xColumn */ deltaparsevtabColumn,
1050 /* xRowid */ deltaparsevtabRowid,
1051 /* xUpdate */ 0,
1052 /* xBegin */ 0,
1053 /* xSync */ 0,
1054 /* xCommit */ 0,
1055 /* xRollback */ 0,
1056 /* xFindMethod */ 0,
1057 /* xRename */ 0,
1058 /* xSavepoint */ 0,
1059 /* xRelease */ 0,
1060 /* xRollbackTo */ 0,
1061 /* xShadowName */ 0,
1062 /* xIntegrity */ 0
1067 #ifdef _WIN32
1068 __declspec(dllexport)
1069 #endif
1070 int sqlite3_fossildelta_init(
1071 sqlite3 *db,
1072 char **pzErrMsg,
1073 const sqlite3_api_routines *pApi
1075 static const int enc = SQLITE_UTF8|SQLITE_INNOCUOUS;
1076 int rc = SQLITE_OK;
1077 SQLITE_EXTENSION_INIT2(pApi);
1078 (void)pzErrMsg; /* Unused parameter */
1079 rc = sqlite3_create_function(db, "delta_create", 2, enc, 0,
1080 deltaCreateFunc, 0, 0);
1081 if( rc==SQLITE_OK ){
1082 rc = sqlite3_create_function(db, "delta_apply", 2, enc, 0,
1083 deltaApplyFunc, 0, 0);
1085 if( rc==SQLITE_OK ){
1086 rc = sqlite3_create_function(db, "delta_output_size", 1, enc, 0,
1087 deltaOutputSizeFunc, 0, 0);
1089 if( rc==SQLITE_OK ){
1090 rc = sqlite3_create_module(db, "delta_parse", &deltaparsevtabModule, 0);
1092 return rc;