Properly support 'merged' tail block
[httpd-crcsyncproxy.git] / crccache / mod_crccache_server.c
blob1f18f47c88a8d877b38a2f5f2700005ed4eae0fc
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache server module
19 * This module is designed to run as a proxy server on the remote end of a slow
20 * internet link. This module uses a crc32 running hash algorithm to reduce
21 * data transfer in cached but modified downstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Authors: Toby Collett (2009), Alex Wulms (2009)
29 #include <stdbool.h>
31 #include <apr_file_io.h>
32 #include <apr_strings.h>
33 #include <apr_base64.h>
35 #include "ap_provider.h"
37 #include "util_filter.h"
38 #include "util_script.h"
39 #include "util_charset.h"
41 #include <http_log.h>
42 #include "ap_wrapper.h"
44 #include "crccache.h"
45 #include "mod_crccache_server.h"
47 #include <crcsync/crcsync.h>
48 #include <zlib.h>
49 #include <openssl/evp.h>
51 module AP_MODULE_DECLARE_DATA crccache_server_module;
53 // Possible states for the output compression
54 typedef enum {
55 COMPRESSION_BUFFER_EMPTY,
56 COMPRESSION_FIRST_DATA_RECEIVED,
57 COMPRESSION_FIRST_BLOCK_WRITTEN,
58 COMPRESSION_ENDED
59 } compression_state_t;
61 //#define MIN(X,Y) (X<Y?X:Y)
63 static void *crccache_server_create_config(apr_pool_t *p, server_rec *s) {
64 crccache_server_conf *conf = apr_pcalloc(p, sizeof(crccache_server_conf));
65 return conf;
68 typedef struct crccache_ctx_t {
69 unsigned char *buffer;
70 size_t buffer_digest_getpos;
71 size_t buffer_read_getpos;
72 size_t buffer_putpos;
73 size_t buffer_size;
74 long crc_read_block_result;
75 size_t crc_read_block_ndigested;
76 apr_bucket_brigade *bb;
77 unsigned block_count;
78 size_t block_size;
79 size_t tail_block_size;
80 uint64_t *hashes;
81 struct crc_context *crcctx;
82 size_t orig_length;
83 size_t tx_length;
84 size_t tx_uncompressed_length;
85 compression_state_t compression_state;
86 z_stream *compression_stream;
87 EVP_MD_CTX mdctx;
88 int debug_skip_writing; // ____
89 } crccache_ctx;
93 * Only enable CRCCache Server when requested through the config file
94 * so that the user can switch CRCCache server on in a specific virtual server
96 static const char *set_crccache_server(cmd_parms *parms, void *dummy, int flag)
98 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
99 &crccache_server_module);
100 conf->enabled = flag;
101 return NULL;
104 static const command_rec crccache_server_cmds[] =
106 AP_INIT_FLAG("CRCcacheServer", set_crccache_server, NULL, RSRC_CONF, "Enable the CRCCache server in this virtual server"),
107 { NULL }
110 static ap_filter_rec_t *crccache_out_filter_handle;
112 int decode_if_block_header(const char * header, int * version, size_t * file_size, char ** hashes)
114 *version = 1;
115 *file_size = 0;
116 *hashes = NULL; // this will be allocated below, make sure we free it
117 int start = 0;
118 int ii;
119 size_t headerlen = strlen(header);
120 for (ii = 0; ii < headerlen;++ii)
122 if (header[ii] == ',' || ii == headerlen-1)
124 sscanf(&header[start]," v=%d",version);
125 sscanf(&header[start]," h=%as",hashes);
126 sscanf(&header[start]," fs=%zu",file_size);
127 start = ii + 1;
131 if (*hashes == NULL)
133 ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE no hashes reported in header");
134 return -1;
136 if (*version != 1)
138 ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE Unsupported header version, %d",*version);
139 free(*hashes);
140 *hashes = NULL;
141 return -1;
143 if (*file_size == 0)
145 ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE no file size reported in header");
146 free(*hashes);
147 *hashes = NULL;
148 return -1;
150 return 0;
153 static int crccache_server_header_parser_handler(request_rec *r) {
154 crccache_server_conf *conf = ap_get_module_config(r->server->module_config,
155 &crccache_server_module);
156 if (conf->enabled)
158 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Checking for headers");
159 const char * header;
160 header = apr_table_get(r->headers_in, BLOCK_HEADER);
161 if (header)
163 int version;
164 size_t file_size;
165 char * hashes;
166 if (decode_if_block_header(header,&version,&file_size,&hashes) < 0)
168 // failed to decode if block header so just process request normally
169 return OK;
171 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Block Hashes header found so enabling protocol: %s",hashes);
172 free (hashes);
173 hashes = NULL;
174 // TODO: save etag and content-encoding headers before INFLATE modifies them, for later reuse in this module itself
175 // (to construct etag-crcsync-<original-encoding> header) (is it possible to pass info from here to the main module?)
176 // Insert mod_deflate's INFLATE filter in the chain to unzip content
177 // so that there is clear text available for the delta algorithm
178 ap_filter_t *inflate_filter = ap_add_output_filter("INFLATE", NULL, r, r->connection);
179 if (inflate_filter == NULL)
181 ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Could not enable INFLATE filter. Will be unable to handle deflated encoded content");
183 else
185 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Successfully enabled INFLATE filter to handle deflated content");
187 // And the crccache filter itself ofcourse
188 ap_add_output_filter_handle(crccache_out_filter_handle,
189 NULL, r, r->connection);
192 else
194 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Did not detect blockheader (%s)", BLOCK_HEADER);
197 /* // All is okay, so set response header to IM Used
198 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Setting 226 header");
199 r->status=226;
200 r->status_line="226 IM Used";
201 return 226;*/
203 return OK;
206 /*static int crccache_server_header_filter_handler(ap_filter_t *f, apr_bucket_brigade *b) {
207 //request_rec *r)
208 request_rec *r = f->r;
210 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE Setting return status code");
212 // All is okay, so set response header to IM Used
213 r->status=226;
214 r->status_line="HTTP/1.1 226 IM Used";
215 return 226;
218 /* TODO: change etag as per document (e.g. append '-crcsync-<original-encoding>' to the etag header)
220 static void crccache_check_etag(request_rec *r, const char *transform) {
221 const char *etag = apr_table_get(r->headers_out, "ETag");
222 if (etag && (((etag[0] != 'W') && (etag[0] != 'w')) || (etag[1] != '/'))) {
223 apr_table_set(r->headers_out, "ETag", apr_pstrcat(r->pool, etag, "-",
224 transform, NULL));
228 static apr_status_t write_compress_buffer(ap_filter_t *f, int flush)
230 unsigned char compress_buf[30000];
231 request_rec *r = f->r;
232 crccache_ctx *ctx = f->ctx;
233 z_stream *strm = ctx->compression_stream;
235 if (ctx->debug_skip_writing)
236 return APR_SUCCESS;
240 strm->avail_out = sizeof(compress_buf);
241 strm->next_out = compress_buf;
242 uInt avail_in_pre_deflate = strm->avail_in;
243 int zRC = deflate(strm, flush);
244 if (zRC == Z_STREAM_ERROR)
246 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server,"CRCCACHE-ENCODE deflate error: %d", zRC);
247 return APR_EGENERAL;
249 int have = sizeof(compress_buf) - strm->avail_out;
250 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
251 "CRCCACHE-ENCODE deflate rslt %d, flush %d, consumed %d, produced %d",
252 zRC, flush, avail_in_pre_deflate - strm->avail_in, have);
253 if (have != 0)
255 // output buffer contains some data to be written
256 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, compress_buf, have);
257 unsigned bucket_size = have;
258 if (ctx->compression_state != COMPRESSION_FIRST_BLOCK_WRITTEN)
260 bucket_size += ENCODING_COMPRESSED_HEADER_SIZE;
262 ctx->tx_length += bucket_size;
263 char * buf = apr_palloc(r->pool, bucket_size);
265 if (ctx->compression_state != COMPRESSION_FIRST_BLOCK_WRITTEN)
267 buf[0] = ENCODING_COMPRESSED;
268 memcpy(buf + ENCODING_COMPRESSED_HEADER_SIZE, compress_buf, have);
269 ctx->compression_state = COMPRESSION_FIRST_BLOCK_WRITTEN;
271 else
273 memcpy(buf, compress_buf, have);
275 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
276 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
279 while (strm->avail_out == 0);
280 if (strm->avail_in != 0)
282 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server,"CRCCACHE-ENCODE deflate still has %d input bytes available", strm->avail_in);
283 return APR_EGENERAL;
286 return APR_SUCCESS;
290 static apr_status_t flush_compress_buffer(ap_filter_t *f)
292 crccache_ctx *ctx = f->ctx;
293 apr_status_t rslt = APR_SUCCESS; // assume all will be fine
295 if (ctx->debug_skip_writing)
296 return APR_SUCCESS;
298 if (ctx->compression_state != COMPRESSION_BUFFER_EMPTY)
300 rslt = write_compress_buffer(f, Z_FINISH); // take the real status
301 deflateReset(ctx->compression_stream);
302 ctx->compression_state = COMPRESSION_BUFFER_EMPTY;
303 // ____ ctx->debug_skip_writing = 1; // skip writing after handling first compressed block
305 return rslt;
309 * Write literal data
311 static apr_status_t write_literal(ap_filter_t *f, unsigned char *buffer, long count)
313 crccache_ctx *ctx = f->ctx;
315 if (ctx->debug_skip_writing)
316 return APR_SUCCESS;
318 apr_status_t rslt;
319 if (ctx->compression_state == COMPRESSION_BUFFER_EMPTY)
321 ctx->compression_state = COMPRESSION_FIRST_DATA_RECEIVED;
323 ctx->compression_stream->avail_in = count;
324 ctx->compression_stream->next_in = buffer;
325 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server, buffer, count);
326 rslt = write_compress_buffer(f, Z_NO_FLUSH);
327 ctx->tx_uncompressed_length += count;
328 return rslt;
332 * Write hash
334 static apr_status_t write_hash(ap_filter_t *f, unsigned char *buffer, long count)
336 request_rec *r = f->r;
337 crccache_ctx *ctx = f->ctx;
338 apr_status_t rslt;
340 rslt = flush_compress_buffer(f);
341 if (rslt != APR_SUCCESS)
343 return rslt;
346 if (ctx->debug_skip_writing)
347 return APR_SUCCESS;
349 unsigned bucket_size = count + 1;
350 ctx->tx_length += bucket_size;
351 ctx->tx_uncompressed_length += bucket_size;
352 char * buf = apr_palloc(r->pool, bucket_size);
354 buf[0] = ENCODING_HASH;
355 memcpy(&buf[1],buffer,count);
356 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE HASH");
357 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
358 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
359 return APR_SUCCESS;
364 * Write a block reference
366 static apr_status_t write_block_reference(ap_filter_t *f, long result)
368 request_rec *r = f->r;
369 crccache_ctx *ctx = f->ctx;
370 apr_status_t rslt;
372 rslt = flush_compress_buffer(f);
373 if (rslt != APR_SUCCESS)
375 return rslt;
378 if (ctx->debug_skip_writing)
379 return APR_SUCCESS;
381 unsigned bucket_size = ENCODING_BLOCK_HEADER_SIZE;
382 ctx->tx_length += bucket_size;
383 ctx->tx_uncompressed_length += bucket_size;
384 char * buf = apr_palloc(r->pool, bucket_size);
386 buf[0] = ENCODING_BLOCK;
387 buf[1] = (unsigned char) ((-result)-1); // invert and get back to zero based
388 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE block %d",buf[1]);
389 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
390 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
391 return APR_SUCCESS;
395 * Process one block of data: try to match it against the CRC, append
396 * the result to the ouput ring and remember the result (e.g. was
397 * it a block-match or was a literal processed)
399 static apr_status_t process_block(ap_filter_t *f)
401 request_rec *r = f->r;
402 crccache_ctx *ctx = f->ctx;
403 apr_status_t rslt = APR_SUCCESS;
405 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_block");
406 if (ctx->crcctx == NULL)
408 // This should never happen
409 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,"CRCCACHE-ENCODE crcctx = null");
410 return APR_EGENERAL;
413 long rd_block_rslt;
414 size_t ndigested = crc_read_block(
415 ctx->crcctx,
416 &rd_block_rslt,
417 ctx->buffer+ctx->buffer_digest_getpos,
418 ctx->buffer_putpos-ctx->buffer_digest_getpos
420 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
421 "CRCCACHE-ENCODE crc_read_block ndigested: %zu, result %ld", ndigested, rd_block_rslt);
424 // rd_block_rslt = 0: do nothing (it is a 'literal' block of exactly 'blocksize' bytes at the end of the buffer, it will have to be moved
425 // to the beginning of the moving window so that it can be written upon the next call to crc_read_block or crc_read_flush)
426 // rd_block_rslt > 0: send literal
427 // rd_block_rslt < 0: send block
428 if (rd_block_rslt > 0)
430 rslt = write_literal(f, ctx->buffer+ctx->buffer_read_getpos, rd_block_rslt);
431 ctx->buffer_read_getpos += rd_block_rslt;
433 else if (rd_block_rslt < 0)
435 rslt = write_block_reference(f, rd_block_rslt);
436 unsigned char blocknum = (unsigned char) ((-rd_block_rslt)-1);
437 ctx->buffer_read_getpos += (blocknum == ctx->block_count-1) ? ctx->tail_block_size : ctx->block_size;
440 // Update the context with the results
441 ctx->crc_read_block_result = rd_block_rslt;
442 ctx->crc_read_block_ndigested = ndigested;
443 ctx->buffer_digest_getpos += ndigested;
444 return rslt;
448 * Flush one block of data: get it from the crccontext, append
449 * the result to the ouput ring and remember the result (e.g. was
450 * it a block-match or was a literal processed)
452 static apr_status_t flush_block(ap_filter_t *f)
454 request_rec *r = f->r;
455 crccache_ctx *ctx = f->ctx;
456 apr_status_t rslt = APR_SUCCESS;
458 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_flush");
459 if (ctx->crcctx == NULL)
461 // This should never happen
462 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,"CRCCACHE-ENCODE crcctx = null");
463 return APR_EGENERAL;
465 long rd_flush_rslt = crc_read_flush(ctx->crcctx);
466 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE crc_read_flush result %ld", rd_flush_rslt);
468 // rd_flush_rslt = 0: do nothing
469 // rd_flush_rslt > 0: send literal that was already digested but not yet returned by read-block
470 // rd_flush_rslt < 0: send block that was already digested but not yet returned by read-block
471 if (rd_flush_rslt > 0)
473 rslt = write_literal(f, ctx->buffer+ctx->buffer_read_getpos, rd_flush_rslt);
474 ctx->buffer_read_getpos += rd_flush_rslt;
476 else if (rd_flush_rslt < 0)
478 rslt = write_block_reference(f, rd_flush_rslt);
479 unsigned char blocknum = (unsigned char) ((-rd_flush_rslt)-1);
480 ctx->buffer_read_getpos += (blocknum == ctx->block_count-1) ? ctx->tail_block_size : ctx->block_size;
483 // Update the context with the results
484 ctx->crc_read_block_result = rd_flush_rslt;
485 ctx->crc_read_block_ndigested = 0;
486 return rslt;
490 * Clean-up memory used by helper libraries, that don't know about apr_palloc
491 * and that (probably) use classical malloc/free
493 static apr_status_t deflate_ctx_cleanup(void *data)
495 crccache_ctx *ctx = (crccache_ctx *)data;
497 if (ctx != NULL)
499 if (ctx->compression_state != COMPRESSION_ENDED)
501 deflateEnd(ctx->compression_stream);
502 ctx->compression_state = COMPRESSION_ENDED;
504 if (ctx->crcctx != NULL)
506 crc_context_free(ctx->crcctx);
507 ctx->crcctx = NULL;
510 return APR_SUCCESS;
513 * End of stream has been reached:
514 * Process any data still in the buffer and flush all internal
515 * structures of crcsync and of zlib
516 * Furthermore, add a strong hash
518 static apr_status_t process_eos(ap_filter_t *f)
520 crccache_ctx *ctx = f->ctx;
521 apr_status_t rslt;
523 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server,"CRCCACHE-ENCODE EOS reached for APR bucket");
526 while (ctx->buffer_digest_getpos < ctx->buffer_putpos)
528 // There is still data in the buffer. Process it.
529 rslt = process_block(f);
530 if (rslt != APR_SUCCESS)
532 return rslt;
538 // Flush remaining block in the crcctx
539 rslt = flush_block(f);
540 if (rslt != APR_SUCCESS)
542 return rslt;
545 while (ctx->crc_read_block_result != 0);
547 // Flush anything that is remaining in the compress buffer
548 rslt = flush_compress_buffer(f);
549 if (rslt != APR_SUCCESS)
551 return rslt;
554 unsigned md_len;
555 unsigned char md_value[EVP_MAX_MD_SIZE];
556 EVP_DigestFinal_ex(&ctx->mdctx, md_value, &md_len);
557 EVP_MD_CTX_cleanup(&ctx->mdctx);
558 write_hash(f, md_value, md_len);
560 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server,
561 "CRCCACHE-ENCODE complete size %f%% (encoded-uncompressed=%zu encoded=%zu original=%zu",100.0*((float)ctx->tx_length/(float)ctx->orig_length),ctx->tx_uncompressed_length, ctx->tx_length, ctx->orig_length);
563 return APR_SUCCESS;
567 * Process a data bucket; append data into a moving window buffer
568 * and encode it with crcsync algorithm when window contains enough
569 * data for crcsync to find potential matches
571 static apr_status_t process_data_bucket(ap_filter_t *f, apr_bucket *e)
573 request_rec *r = f->r;
574 crccache_ctx *ctx = f->ctx;
576 const char *data;
577 apr_size_t len;
578 apr_status_t rslt;
580 /* read */
581 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
582 ctx->orig_length += len;
583 // update our sha1 hash
584 EVP_DigestUpdate(&ctx->mdctx, data, len);
585 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE normal data in APR bucket, read %ld", len);
587 // append data to the buffer and encode buffer content using the crc_read_block magic
588 size_t bucket_used_count = 0;
589 size_t bucket_data_left;
590 while(bucket_used_count < len)
592 /* Append as much data as possible into the buffer */
593 bucket_data_left = len - bucket_used_count;
594 size_t copy_size = MIN(ctx->buffer_size-ctx->buffer_putpos, bucket_data_left);
595 memcpy(ctx->buffer+ctx->buffer_putpos, data+bucket_used_count, copy_size);
596 bucket_used_count += copy_size;
597 bucket_data_left -= copy_size;
598 ctx->buffer_putpos += copy_size;
599 /* flush the buffer if it is appropriate */
600 if (ctx->buffer_putpos == ctx->buffer_size)
602 // Buffer is filled to the end. Flush as much as possible
603 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
604 "CRCCACHE-ENCODE Buffer is filled to end, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
605 ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->block_size);
606 while (ctx->buffer_putpos - ctx->buffer_digest_getpos > ctx->block_size)
608 // We can still scan at least 1 block + 1 byte forward: try to flush next part
609 rslt = process_block(f);
610 if (rslt != APR_SUCCESS)
612 return rslt;
614 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
615 "CRCCACHE-ENCODE Processed a block, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
616 ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->block_size);
619 if (ctx->buffer_putpos != ctx->buffer_read_getpos)
621 // Copy the remaining part of the buffer to the start of the buffer,
622 // so that it can be filled again as new data arrive
623 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
624 "CRCCACHE-ENCODE Moving %zu bytes to begin of buffer",
625 ctx->buffer_putpos - ctx->buffer_read_getpos);
626 memcpy(ctx->buffer, ctx->buffer + ctx->buffer_read_getpos, ctx->buffer_putpos - ctx->buffer_read_getpos);
628 // Reset getpos to the beginning of the buffer and putpos accordingly
629 ctx->buffer_putpos -= ctx->buffer_read_getpos;
630 ctx->buffer_digest_getpos -= ctx->buffer_read_getpos;
631 ctx->buffer_read_getpos = 0;
633 while (ctx->crc_read_block_result < 0 && ctx->buffer_putpos - ctx->buffer_digest_getpos > ctx->block_size)
635 // Previous block matched exactly. Let's hope the next block as well
636 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
637 "CRCCACHE-ENCODE Previous block matched, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
638 ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->block_size);
639 rslt = process_block(f);
640 if (rslt != APR_SUCCESS)
642 return rslt;
646 return APR_SUCCESS; // Yahoo, all went well
650 * CACHE_OUT filter
651 * ----------------
653 * Deliver cached content (headers and body) up the stack.
655 static apr_status_t crccache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) {
656 apr_bucket *e;
657 request_rec *r = f->r;
658 crccache_ctx *ctx = f->ctx;
659 int zRC;
660 int return_code = APR_SUCCESS;
662 /* Do nothing if asked to filter nothing. */
663 if (APR_BRIGADE_EMPTY(bb)) {
664 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE bucket brigade is empty -> nothing todo");
665 return ap_pass_brigade(f->next, bb);
668 /* If we don't have a context, we need to ensure that it is okay to send
669 * the deflated content. If we have a context, that means we've done
670 * this before and we liked it.
671 * This could be not so nice if we always fail. But, if we succeed,
672 * we're in better shape.
674 if (ctx == NULL)
676 const char *encoding;
678 /* only work on main request/no subrequests */
679 if (r->main != NULL) {
680 ap_remove_output_filter(f);
681 return ap_pass_brigade(f->next, bb);
684 /* We can't operate on Content-Ranges */
685 if (apr_table_get(r->headers_out, "Content-Range") != NULL) {
686 ap_remove_output_filter(f);
687 return ap_pass_brigade(f->next, bb);
690 /* Let's see what our current Content-Encoding is.
691 * If it's already encoded by crccache: don't compress again.
692 * (We could, but let's not.)
694 encoding = apr_table_get(r->headers_out, ENCODING_HEADER);
695 if (encoding && strcasecmp(CRCCACHE_ENCODING,encoding) == 0)
697 /* Even if we don't accept this request based on it not having
698 * the Accept-Encoding, we need to note that we were looking
699 * for this header and downstream proxies should be aware of that.
701 apr_table_mergen(r->headers_out, "Vary", "A-IM");
702 ap_remove_output_filter(f);
703 return ap_pass_brigade(f->next, bb);
706 /* For a 304 or 204 response there is no entity included in
707 * the response and hence nothing to deflate. */
708 if (r->status == HTTP_NOT_MODIFIED || r->status ==HTTP_NO_CONTENT)
710 ap_remove_output_filter(f);
711 return ap_pass_brigade(f->next, bb);
714 /* All Ok. We're cool with filtering this. */
715 ctx = f->ctx = apr_pcalloc(r->pool, sizeof(*ctx));
716 ctx->debug_skip_writing = 0;
717 ctx->orig_length = 0;
718 ctx->tx_length = 0;
719 ctx->tx_uncompressed_length = 0;
720 ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
722 /* If Content-Encoding present and differs from "identity", we can't handle it */
723 if (encoding && strcasecmp(encoding, "identity")) {
724 ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
725 "Not encoding with crccache. It is already encoded with: %s", encoding);
726 ap_remove_output_filter(f);
727 return ap_pass_brigade(f->next, bb);
730 /* Parse the input headers */
731 const char * header;
732 header = apr_table_get(r->headers_in, BLOCK_HEADER);
733 int version;
734 size_t file_size;
735 char * hashes;
736 if (decode_if_block_header(header,&version,&file_size,&hashes) < 0)
738 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,"crccache: failed to decode if-block header");
739 ap_remove_output_filter(f);
740 return ap_pass_brigade(f->next, bb);
742 // Decode the hashes
743 ctx->block_count = apr_base64_decode_len(hashes)/(HASH_SIZE/8);
744 // this may over allocate by a couple of bytes but no big deal
745 ctx->hashes = apr_palloc(r->pool, apr_base64_decode_len(hashes));
746 apr_base64_decode((char *)ctx->hashes, hashes);
747 free(hashes);
748 hashes = NULL;
750 ctx->block_size = file_size/ctx->block_count;
751 ctx->tail_block_size = ctx->block_size + file_size % ctx->block_count;
752 size_t block_count_including_final_block = ctx->block_count;// + (ctx->tail_block_size != 0);
753 ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
754 "If-block header decoded, version %d: %d hashes of %d and one of %d", version, ctx->block_count-1,(int)ctx->block_size,(int)ctx->tail_block_size);
756 // swap to network byte order
757 int i;
758 for (i = 0; i < block_count_including_final_block;++i)
760 htobe64(ctx->hashes[i]);
763 // Data come in at chunks that are potentially smaller then block_size
764 // Accumulate those chunks into a buffer.
765 // The buffer must be at least 2*block_size so that crc_read_block(...) can find a matching block, regardless
766 // of the data alignment compared to the original page.
767 // The buffer is basically a moving window in the new page. So sometimes the last part of the buffer must be
768 // copied to the beginning again. The larger the buffer, the less often such a copy operation is required
769 // Though, the larger the buffer, the bigger the memory demand.
770 // A size of 4*block_size (20% of original file size) seems to be a good balance
772 // TODO: tune the buffer-size depending on the mime-type. Already compressed data (zip, gif, jpg, mpg, etc) will
773 // probably only have matching blocks if the file is totally unmodified. As soon as one byte differs in the original
774 // uncompressed data, the entire compressed data stream will be different anyway, so in such case it does not make
775 // much sense to even keep invoking the crc_read_block(...) function as soon as a difference has been found.
776 // Hence, no need to make a (potentially huge) buffer for these type of compressed (potentially huge, think about movies)
777 // data types.
778 ctx->buffer_size = ctx->block_size*4 + 1;
779 ctx->buffer_digest_getpos = 0;
780 ctx->buffer_read_getpos = 0;
781 ctx->buffer_putpos = 0;
782 ctx->crc_read_block_result = 0;
783 ctx->buffer = apr_palloc(r->pool, ctx->buffer_size);
787 /* Setup deflate for compressing non-matched literal data */
788 ctx->compression_state = COMPRESSION_BUFFER_EMPTY;
789 // TODO: should I pass some apr_palloc based function to prevent memory leaks
790 //in case of unexpected errors?
792 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE size of compression stream: %zd",sizeof(*(ctx->compression_stream)));
793 ctx->compression_stream = apr_palloc(r->pool, sizeof(*(ctx->compression_stream)));
794 ctx->compression_stream->zalloc = Z_NULL;
795 ctx->compression_stream->zfree = Z_NULL;
796 ctx->compression_stream->opaque = Z_NULL;
797 zRC = deflateInit(ctx->compression_stream, Z_DEFAULT_COMPRESSION); // TODO: make compression level configurable
798 if (zRC != Z_OK)
800 // Can't initialize the compression engine for compressing literal data
801 deflateEnd(ctx->compression_stream); // free memory used by deflate
802 free(ctx->compression_stream);
803 ctx->compression_stream = NULL;
804 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
805 "unable to init Zlib: "
806 "deflateInit returned %d: URL %s",
807 zRC, r->uri);
808 ap_remove_output_filter(f);
809 return ap_pass_brigade(f->next, bb);
812 // initialise the context for our sha1 digest of the unencoded response
813 EVP_MD_CTX_init(&ctx->mdctx);
814 const EVP_MD *md = EVP_sha1();
815 EVP_DigestInit_ex(&ctx->mdctx, md, NULL);
817 // now initialise the crcsync context that will do the real work
818 ctx->crcctx = crc_context_new(ctx->block_size, HASH_SIZE,ctx->hashes, block_count_including_final_block, ctx->tail_block_size);
820 // Register a cleanup function to cleanup internal libz and crcsync resources
821 apr_pool_cleanup_register(r->pool, ctx, deflate_ctx_cleanup,
822 apr_pool_cleanup_null);
824 // All checks and initializations are OK
825 // Modify headers that are impacted by this transformation
826 // TODO: the crccache-client could recalculate these headers once it has
827 // reconstructed the page, before handling the reconstructed page
828 // back to the client
829 apr_table_setn(r->headers_out, ENCODING_HEADER, CRCCACHE_ENCODING);
830 apr_table_addn(r->headers_out, VARY_HEADER, VARY_VALUE);
831 apr_table_unset(r->headers_out, "Content-Length");
832 apr_table_unset(r->headers_out, "Content-MD5");
833 crccache_check_etag(r, CRCCACHE_ENCODING);
835 // All is okay, so set response header to IM Used
836 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE Server end of context setup");
837 //r->status=226;
838 //r->status_line="226 IM Used";
841 while (!APR_BRIGADE_EMPTY(bb))
843 const char *data;
844 apr_size_t len;
845 apr_status_t rslt;
847 e = APR_BRIGADE_FIRST(bb);
849 if (APR_BUCKET_IS_EOS(e))
851 // Process end of stream: flush data buffers, compression buffers, etc.
852 // and calculate a strong hash.
853 rslt = process_eos(f);
855 /* Remove EOS from the old list, and insert into the new. */
856 APR_BUCKET_REMOVE(e);
857 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
859 /* This filter is done once it has served up its content */
860 ap_remove_output_filter(f);
862 if (rslt != APR_SUCCESS)
864 return rslt; // A problem occurred. Abort the processing
867 /* Okay, we've seen the EOS.
868 * Time to pass it along down the chain.
870 return ap_pass_brigade(f->next, ctx->bb);
873 if (APR_BUCKET_IS_FLUSH(e))
875 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE flush APR bucket");
876 apr_status_t rv;
878 /* Remove flush bucket from old brigade and insert into the new. */
879 APR_BUCKET_REMOVE(e);
880 // TODO: optimize; do not insert two consecutive flushes when no intermediate
881 // output block was written
882 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
883 rv = ap_pass_brigade(f->next, ctx->bb);
884 if (rv != APR_SUCCESS) {
885 return rv;
887 continue;
890 if (APR_BUCKET_IS_METADATA(e)) {
891 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE metadata APR bucket");
893 * Remove meta data bucket from old brigade and insert into the
894 * new.
896 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
897 if (len > 2)
898 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
899 "CRCCACHE-ENCODE Metadata, read %zu, %d %d %d",len,data[0],data[1],data[2]);
900 else
901 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
902 "CRCCACHE-ENCODE Metadata, read %zu",len);
903 APR_BUCKET_REMOVE(e);
904 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
905 continue;
908 // Bucket is non of the above types. Assume it is a data bucket
909 // which means it can be encoded with the crcsync algorithm
910 rslt = process_data_bucket(f, e);
912 APR_BUCKET_REMOVE(e);
913 if (rslt != APR_SUCCESS)
915 break; // A problem occurred. Abort the processing
919 apr_brigade_cleanup(bb);
920 return return_code;
923 static void crccache_server_register_hook(apr_pool_t *p) {
924 ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL,
925 "Registering crccache server module, (C) 2009, Toby Collett and Alex Wulms");
927 ap_hook_header_parser(crccache_server_header_parser_handler, NULL, NULL,
928 APR_HOOK_MIDDLE);
930 ap_register_output_filter("CRCCACHE_HEADER", crccache_server_header_filter_handler,
931 NULL, AP_FTYPE_PROTOCOL);
933 crccache_out_filter_handle = ap_register_output_filter("CRCCACHE_OUT",
934 crccache_out_filter, NULL, AP_FTYPE_CONTENT_SET);
937 module AP_MODULE_DECLARE_DATA crccache_server_module = {
938 STANDARD20_MODULE_STUFF, NULL, /* create per-directory config structure */
939 NULL , /* merge per-directory config structures */
940 crccache_server_create_config, /* create per-server config structure */
941 NULL , /* merge per-server config structures */
942 crccache_server_cmds, /* command apr_table_t */
943 crccache_server_register_hook /* register hooks */