Append crcsync-<original-encoding> to etag and some enhancements and bug fixes
[httpd-crcsyncproxy.git] / crccache / mod_crccache_server.c
blob882108f67b1e26607eb3aa916dede2a094e1a121
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache server module
19 * This module is designed to run as a proxy server on the remote end of a slow
20 * internet link. This module uses a crc32 running hash algorithm to reduce
21 * data transfer in cached but modified downstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Authors: Toby Collett (2009), Alex Wulms (2009)
29 #include <stdbool.h>
31 #include <apr_file_io.h>
32 #include <apr_strings.h>
33 #include <apr_base64.h>
35 #include "ap_provider.h"
37 #include "util_filter.h"
38 #include "util_script.h"
39 #include "util_charset.h"
41 #include <http_log.h>
42 #include "ap_wrapper.h"
44 #include "crccache.h"
45 #include "mod_crccache_server.h"
47 #include <crcsync/crcsync.h>
48 #include <zlib.h>
49 #include <openssl/evp.h>
51 module AP_MODULE_DECLARE_DATA crccache_server_module;
53 // Possible states for the output compression
54 typedef enum {
55 COMPRESSION_BUFFER_EMPTY,
56 COMPRESSION_FIRST_DATA_RECEIVED,
57 COMPRESSION_FIRST_BLOCK_WRITTEN,
58 COMPRESSION_ENDED
59 } compression_state_t;
61 static void *crccache_server_create_config(apr_pool_t *p, server_rec *s) {
62 crccache_server_conf *conf = apr_pcalloc(p, sizeof(crccache_server_conf));
63 conf->enabled = 0;
64 conf->decoder_modules = NULL;
65 conf->decoder_modules_cnt = 0;
66 return conf;
69 typedef enum { GS_INIT, GS_HEADERS_SAVED, GS_ENCODING } global_state_t;
71 typedef struct crccache_ctx_t {
72 global_state_t global_state;
73 char *old_content_encoding;
74 char *old_etag;
75 unsigned char *buffer;
76 size_t buffer_digest_getpos;
77 size_t buffer_read_getpos;
78 size_t buffer_putpos;
79 size_t buffer_size;
80 long crc_read_block_result;
81 size_t crc_read_block_ndigested;
82 apr_bucket_brigade *bb;
83 unsigned block_count;
84 size_t block_size;
85 size_t tail_block_size;
86 uint64_t *hashes;
87 struct crc_context *crcctx;
88 size_t orig_length;
89 size_t tx_length;
90 size_t tx_uncompressed_length;
91 compression_state_t compression_state;
92 z_stream *compression_stream;
93 EVP_MD_CTX mdctx;
94 int debug_skip_writing; // ____
95 } crccache_ctx;
99 * Only enable CRCCache Server when requested through the config file
100 * so that the user can switch CRCCache server on in a specific virtual server
102 static const char *set_crccache_server(cmd_parms *parms, void *dummy, int flag)
104 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
105 &crccache_server_module);
106 conf->enabled = flag;
107 return NULL;
110 static const char *set_crccache_decoder_module(cmd_parms *parms, void *in_struct_ptr, const char *arg)
112 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
113 &crccache_server_module);
114 struct decoder_modules_t *decoder_module = malloc(sizeof(*decoder_module));
115 if (decoder_module == NULL)
117 return "Out of memory exception while allocating decoder_module structure";
119 char *tok;
120 char *last = NULL;
122 char *data = strdup(arg);
123 if (data == NULL)
125 return "Out of memory exception while parsing DecoderModule parameter";
128 tok = apr_strtok(data, ": ", &last);
129 if (tok == NULL)
131 return "DecoderModule value must be of format: filtername:encoding[,encoding]*";
134 decoder_module->name = strdup(tok);
135 if (decoder_module->name == NULL)
137 return "Out of memory exception while storing name in decoder_module structure";
140 tok = apr_strtok(NULL, ": ", &last);
141 if (tok == NULL)
143 return "DecoderModule value must be of format: filtername:encoding[,encoding]*";
146 for (tok = apr_strtok(tok, ", ", &last); tok != NULL; tok = apr_strtok(NULL, ", ", &last))
148 struct encodings_t *encoding = malloc(sizeof(*encoding));
149 if (encoding == NULL)
151 return "Out of memory exception while allocating encoding structure";
154 encoding->encoding = strdup(tok);
155 if (encoding->encoding == NULL)
157 return "Out of memory exception while storing encoding value in encoding structure";
160 // Insert new encoding to the head of the encodings list
161 encoding->next = decoder_module->encodings;
162 decoder_module->encodings = encoding;
165 // Insert (new) decoder module to the head of the decoder_modules list
166 decoder_module->next = conf->decoder_modules;
167 conf->decoder_modules = decoder_module;
168 conf->decoder_modules_cnt++;
170 return NULL;
173 static const command_rec crccache_server_cmds[] =
175 AP_INIT_FLAG("CRCcacheServer", set_crccache_server, NULL, RSRC_CONF, "Enable the CRCCache server in this virtual server"),
176 AP_INIT_TAKE1("DecoderModule", set_crccache_decoder_module, NULL, RSRC_CONF, "DecoderModules to decode content-types (e.g. INFLATE:gzip,x-gzip)"),
177 { NULL }
180 static ap_filter_rec_t *crccache_out_filter_handle;
181 static ap_filter_rec_t *crccache_out_save_headers_filter_handle;
184 int decode_if_block_header(const char * header, int * version, size_t * file_size, char ** hashes)
186 *version = 1;
187 *file_size = 0;
188 *hashes = NULL; // this will be allocated below, make sure we free it
189 int start = 0;
190 int ii;
191 size_t headerlen = strlen(header);
192 for (ii = 0; ii < headerlen;++ii)
194 if (header[ii] == ',' || ii == headerlen-1)
196 sscanf(&header[start]," v=%d",version);
197 sscanf(&header[start]," h=%as",hashes);
198 sscanf(&header[start]," fs=%zu",file_size);
199 start = ii + 1;
203 if (*hashes == NULL)
205 ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE no hashes reported in header");
206 return -1;
208 if (*version != 1)
210 ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE Unsupported header version, %d",*version);
211 free(*hashes);
212 *hashes = NULL;
213 return -1;
215 if (*file_size == 0)
217 ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, "CRCCACHE-ENCODE no file size reported in header");
218 free(*hashes);
219 *hashes = NULL;
220 return -1;
222 return 0;
225 static int crccache_server_header_parser_handler(request_rec *r) {
226 crccache_server_conf *conf = ap_get_module_config(r->server->module_config,
227 &crccache_server_module);
228 if (conf->enabled)
230 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Checking for headers");
231 const char * header;
232 header = apr_table_get(r->headers_in, BLOCK_HEADER);
233 if (header)
235 int version;
236 size_t file_size;
237 char * hashes;
238 if (decode_if_block_header(header,&version,&file_size,&hashes) < 0)
240 // failed to decode if block header so just process request normally
241 return OK;
243 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Block Hashes header found (hashes: %s)",hashes);
244 free (hashes);
245 hashes = NULL;
247 crccache_ctx *ctx = apr_pcalloc(r->pool, sizeof(*ctx));
248 ctx->global_state = GS_INIT;
249 ctx->old_content_encoding = NULL;
250 ctx->old_etag = NULL;
252 // Add the filter to save the headers, so that they can be restored after an optional INFLATE or other decoder module
253 ap_add_output_filter_handle(crccache_out_save_headers_filter_handle,
254 ctx, r, r->connection);
256 char *accept_encoding = apr_pstrdup(r->pool, apr_table_get(r->headers_in, ACCEPT_ENCODING_HEADER));
257 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Incoming Accept-Encoding header: %s", accept_encoding == NULL ? "NULL" : accept_encoding);
258 if (accept_encoding != NULL)
260 struct decoder_modules_t *required_dms[conf->decoder_modules_cnt];
261 unsigned required_dms_size = 0;
262 char *tok;
263 char *last = NULL;
264 struct decoder_modules_t *dm;
265 struct encodings_t *enc;
266 unsigned cnt;
267 // Build the list of filter modules to handle the requested encodings and
268 // remove all non-supported encodings from the header
269 apr_table_unset(r->headers_in, ACCEPT_ENCODING_HEADER);
270 for (tok = apr_strtok(accept_encoding, ", ", &last); tok != NULL; tok = apr_strtok(NULL, ", ", &last)) {
271 for (dm = conf->decoder_modules; dm != NULL; dm = dm->next) {
272 for (enc = dm->encodings; enc != NULL; enc = enc->next) {
273 if (strcmp(tok, enc->encoding) == 0)
275 // This module supports the requested encoding
276 // Add it to the list if it is not already present
277 for (cnt = 0; cnt != required_dms_size; cnt++)
279 if (required_dms[cnt] == dm)
280 break; // module is already inserted in list
282 if (cnt == required_dms_size)
284 required_dms[required_dms_size++] = dm;
286 apr_table_mergen(r->headers_in, ACCEPT_ENCODING_HEADER, tok);
291 // Enable the requested filter modules
292 for (cnt = 0; cnt != required_dms_size; cnt++) {
293 dm = required_dms[cnt];
294 ap_filter_t *filter = ap_add_output_filter(dm->name, NULL, r, r->connection);
295 if (filter == NULL) {
296 ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Could not enable %s filter", dm->name);
297 // Remove the encodings handled by this filter from the list of accepted encodings
298 accept_encoding = apr_pstrdup(r->pool, apr_table_get(r->headers_in, ACCEPT_ENCODING_HEADER));
299 apr_table_unset(r->headers_in, ACCEPT_ENCODING_HEADER);
300 for (tok = apr_strtok(accept_encoding, ", ", &last); tok != NULL; tok = apr_strtok(NULL, ", ", &last)) {
301 for (enc = dm->encodings; enc != NULL; enc = enc->next) {
302 if (strcmp(tok, enc->encoding)==0) {
303 ap_log_error(APLOG_MARK, APLOG_WARNING, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Removing encoding %s", tok);
304 break;
307 if (enc == NULL) {
308 // Did not find the tok encoding in the list. It can be merged back into the header
309 apr_table_mergen(r->headers_in, ACCEPT_ENCODING_HEADER, tok);
313 else
315 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Successfully enabled %s filter", dm->name);
318 const char *updated_accept_encoding = apr_table_get(r->headers_in, ACCEPT_ENCODING_HEADER);
319 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Modified Accept-Encoding header: %s", updated_accept_encoding == NULL ? "NULL" : updated_accept_encoding);
321 // Add the crccache filter itself, after the decoder modules
322 ap_add_output_filter_handle(crccache_out_filter_handle,
323 ctx, r, r->connection);
325 else
327 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCCACHE-ENCODE Did not detect blockheader (%s)", BLOCK_HEADER);
330 /* // All is okay, so set response header to IM Used
331 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Setting 226 header");
332 r->status=226;
333 r->status_line="226 IM Used";
334 return 226;*/
336 return OK;
339 /*static int crccache_server_header_filter_handler(ap_filter_t *f, apr_bucket_brigade *b) {
340 //request_rec *r)
341 request_rec *r = f->r;
343 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE Setting return status code");
345 // All is okay, so set response header to IM Used
346 r->status=226;
347 r->status_line="HTTP/1.1 226 IM Used";
348 return 226;
351 static void crccache_check_etag(request_rec *r, crccache_ctx *ctx, const char *transform) {
352 const char *etag = ctx->old_etag;
353 if (etag) {
354 apr_table_set(r->headers_out, ETAG_HEADER,
355 apr_pstrcat(
356 r->pool,
357 etag, "-",
358 transform, "-",
359 ctx->old_content_encoding == NULL ? "identity" : ctx->old_content_encoding,
360 NULL
363 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Changed ETag header to %s", apr_table_get(r->headers_out, ETAG_HEADER));
367 static apr_status_t write_compress_buffer(ap_filter_t *f, int flush)
369 unsigned char compress_buf[30000];
370 request_rec *r = f->r;
371 crccache_ctx *ctx = f->ctx;
372 z_stream *strm = ctx->compression_stream;
374 if (ctx->debug_skip_writing)
375 return APR_SUCCESS;
379 strm->avail_out = sizeof(compress_buf);
380 strm->next_out = compress_buf;
381 uInt avail_in_pre_deflate = strm->avail_in;
382 int zRC = deflate(strm, flush);
383 if (zRC == Z_STREAM_ERROR)
385 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server,"CRCCACHE-ENCODE deflate error: %d", zRC);
386 return APR_EGENERAL;
388 int have = sizeof(compress_buf) - strm->avail_out;
389 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
390 "CRCCACHE-ENCODE deflate rslt %d, flush %d, consumed %d, produced %d",
391 zRC, flush, avail_in_pre_deflate - strm->avail_in, have);
392 if (have != 0)
394 // output buffer contains some data to be written
395 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, compress_buf, have);
396 unsigned bucket_size = have;
397 if (ctx->compression_state != COMPRESSION_FIRST_BLOCK_WRITTEN)
399 bucket_size += ENCODING_COMPRESSED_HEADER_SIZE;
401 ctx->tx_length += bucket_size;
402 char * buf = apr_palloc(r->pool, bucket_size);
404 if (ctx->compression_state != COMPRESSION_FIRST_BLOCK_WRITTEN)
406 buf[0] = ENCODING_COMPRESSED;
407 memcpy(buf + ENCODING_COMPRESSED_HEADER_SIZE, compress_buf, have);
408 ctx->compression_state = COMPRESSION_FIRST_BLOCK_WRITTEN;
410 else
412 memcpy(buf, compress_buf, have);
414 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
415 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
418 while (strm->avail_out == 0);
419 if (strm->avail_in != 0)
421 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server,"CRCCACHE-ENCODE deflate still has %d input bytes available", strm->avail_in);
422 return APR_EGENERAL;
425 return APR_SUCCESS;
429 static apr_status_t flush_compress_buffer(ap_filter_t *f)
431 crccache_ctx *ctx = f->ctx;
432 apr_status_t rslt = APR_SUCCESS; // assume all will be fine
434 if (ctx->debug_skip_writing)
435 return APR_SUCCESS;
437 if (ctx->compression_state != COMPRESSION_BUFFER_EMPTY)
439 rslt = write_compress_buffer(f, Z_FINISH); // take the real status
440 deflateReset(ctx->compression_stream);
441 ctx->compression_state = COMPRESSION_BUFFER_EMPTY;
442 // ____ ctx->debug_skip_writing = 1; // skip writing after handling first compressed block
444 return rslt;
448 * Write literal data
450 static apr_status_t write_literal(ap_filter_t *f, unsigned char *buffer, long count)
452 crccache_ctx *ctx = f->ctx;
454 if (ctx->debug_skip_writing)
455 return APR_SUCCESS;
457 apr_status_t rslt;
458 if (ctx->compression_state == COMPRESSION_BUFFER_EMPTY)
460 ctx->compression_state = COMPRESSION_FIRST_DATA_RECEIVED;
462 ctx->compression_stream->avail_in = count;
463 ctx->compression_stream->next_in = buffer;
464 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server, buffer, count);
465 rslt = write_compress_buffer(f, Z_NO_FLUSH);
466 ctx->tx_uncompressed_length += count;
467 return rslt;
471 * Write hash
473 static apr_status_t write_hash(ap_filter_t *f, unsigned char *buffer, long count)
475 request_rec *r = f->r;
476 crccache_ctx *ctx = f->ctx;
477 apr_status_t rslt;
479 rslt = flush_compress_buffer(f);
480 if (rslt != APR_SUCCESS)
482 return rslt;
485 if (ctx->debug_skip_writing)
486 return APR_SUCCESS;
488 unsigned bucket_size = count + 1;
489 ctx->tx_length += bucket_size;
490 ctx->tx_uncompressed_length += bucket_size;
491 char * buf = apr_palloc(r->pool, bucket_size);
493 buf[0] = ENCODING_HASH;
494 memcpy(&buf[1],buffer,count);
495 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE HASH");
496 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
497 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
498 return APR_SUCCESS;
503 * Write a block reference
505 static apr_status_t write_block_reference(ap_filter_t *f, long result)
507 request_rec *r = f->r;
508 crccache_ctx *ctx = f->ctx;
509 apr_status_t rslt;
511 rslt = flush_compress_buffer(f);
512 if (rslt != APR_SUCCESS)
514 return rslt;
517 if (ctx->debug_skip_writing)
518 return APR_SUCCESS;
520 unsigned bucket_size = ENCODING_BLOCK_HEADER_SIZE;
521 ctx->tx_length += bucket_size;
522 ctx->tx_uncompressed_length += bucket_size;
523 char * buf = apr_palloc(r->pool, bucket_size);
525 buf[0] = ENCODING_BLOCK;
526 buf[1] = (unsigned char) ((-result)-1); // invert and get back to zero based
527 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE block %d",buf[1]);
528 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
529 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
530 return APR_SUCCESS;
534 * Process one block of data: try to match it against the CRC, append
535 * the result to the ouput ring and remember the result (e.g. was
536 * it a block-match or was a literal processed)
538 static apr_status_t process_block(ap_filter_t *f)
540 request_rec *r = f->r;
541 crccache_ctx *ctx = f->ctx;
542 apr_status_t rslt = APR_SUCCESS;
544 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_block");
545 if (ctx->crcctx == NULL)
547 // This should never happen
548 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,"CRCCACHE-ENCODE crcctx = null");
549 return APR_EGENERAL;
552 long rd_block_rslt;
553 size_t ndigested = crc_read_block(
554 ctx->crcctx,
555 &rd_block_rslt,
556 ctx->buffer+ctx->buffer_digest_getpos,
557 ctx->buffer_putpos-ctx->buffer_digest_getpos
559 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
560 "CRCCACHE-ENCODE crc_read_block ndigested: %zu, result %ld", ndigested, rd_block_rslt);
563 // rd_block_rslt = 0: do nothing (it is a 'literal' block of exactly 'tail_blocksize' bytes at the end of the buffer,
564 // it will have to be moved to the beginning of the moving window so that it can be written upon the next call to
565 // crc_read_block or crc_read_flush)
566 // rd_block_rslt > 0: send literal
567 // rd_block_rslt < 0: send block
568 if (rd_block_rslt > 0)
570 rslt = write_literal(f, ctx->buffer+ctx->buffer_read_getpos, rd_block_rslt);
571 ctx->buffer_read_getpos += rd_block_rslt;
573 else if (rd_block_rslt < 0)
575 rslt = write_block_reference(f, rd_block_rslt);
576 unsigned char blocknum = (unsigned char) ((-rd_block_rslt)-1);
577 ctx->buffer_read_getpos += (blocknum == ctx->block_count-1) ? ctx->tail_block_size : ctx->block_size;
580 // Update the context with the results
581 ctx->crc_read_block_result = rd_block_rslt;
582 ctx->crc_read_block_ndigested = ndigested;
583 ctx->buffer_digest_getpos += ndigested;
584 return rslt;
588 * Flush one block of data: get it from the crccontext, append
589 * the result to the ouput ring and remember the result (e.g. was
590 * it a block-match or was a literal processed)
592 static apr_status_t flush_block(ap_filter_t *f)
594 request_rec *r = f->r;
595 crccache_ctx *ctx = f->ctx;
596 apr_status_t rslt = APR_SUCCESS;
598 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_flush");
599 if (ctx->crcctx == NULL)
601 // This should never happen
602 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,"CRCCACHE-ENCODE crcctx = null");
603 return APR_EGENERAL;
605 long rd_flush_rslt = crc_read_flush(ctx->crcctx);
606 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE crc_read_flush result %ld", rd_flush_rslt);
608 // rd_flush_rslt = 0: do nothing
609 // rd_flush_rslt > 0: send literal that was already digested but not yet returned by read-block
610 // rd_flush_rslt < 0: send block that was already digested but not yet returned by read-block
611 if (rd_flush_rslt > 0)
613 rslt = write_literal(f, ctx->buffer+ctx->buffer_read_getpos, rd_flush_rslt);
614 ctx->buffer_read_getpos += rd_flush_rslt;
616 else if (rd_flush_rslt < 0)
618 rslt = write_block_reference(f, rd_flush_rslt);
619 unsigned char blocknum = (unsigned char) ((-rd_flush_rslt)-1);
620 ctx->buffer_read_getpos += (blocknum == ctx->block_count-1) ? ctx->tail_block_size : ctx->block_size;
623 // Update the context with the results
624 ctx->crc_read_block_result = rd_flush_rslt;
625 ctx->crc_read_block_ndigested = 0;
626 return rslt;
630 * Clean-up memory used by helper libraries, that don't know about apr_palloc
631 * and that (probably) use classical malloc/free
633 static apr_status_t deflate_ctx_cleanup(void *data)
635 crccache_ctx *ctx = (crccache_ctx *)data;
637 if (ctx != NULL)
639 if (ctx->compression_state != COMPRESSION_ENDED)
641 deflateEnd(ctx->compression_stream);
642 ctx->compression_state = COMPRESSION_ENDED;
644 if (ctx->crcctx != NULL)
646 crc_context_free(ctx->crcctx);
647 ctx->crcctx = NULL;
650 return APR_SUCCESS;
653 * End of stream has been reached:
654 * Process any data still in the buffer and flush all internal
655 * structures of crcsync and of zlib
656 * Furthermore, add a strong hash
658 static apr_status_t process_eos(ap_filter_t *f)
660 crccache_ctx *ctx = f->ctx;
661 apr_status_t rslt;
663 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server,"CRCCACHE-ENCODE EOS reached for APR bucket");
666 while (ctx->buffer_digest_getpos < ctx->buffer_putpos)
668 // There is still data in the buffer. Process it.
669 rslt = process_block(f);
670 if (rslt != APR_SUCCESS)
672 return rslt;
678 // Flush remaining block in the crcctx
679 rslt = flush_block(f);
680 if (rslt != APR_SUCCESS)
682 return rslt;
685 while (ctx->crc_read_block_result != 0);
687 // Flush anything that is remaining in the compress buffer
688 rslt = flush_compress_buffer(f);
689 if (rslt != APR_SUCCESS)
691 return rslt;
694 unsigned md_len;
695 unsigned char md_value[EVP_MAX_MD_SIZE];
696 EVP_DigestFinal_ex(&ctx->mdctx, md_value, &md_len);
697 EVP_MD_CTX_cleanup(&ctx->mdctx);
698 write_hash(f, md_value, md_len);
700 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server,
701 "CRCCACHE-ENCODE complete size %f%% (encoded-uncompressed=%zu encoded=%zu original=%zu",100.0*((float)ctx->tx_length/(float)ctx->orig_length),ctx->tx_uncompressed_length, ctx->tx_length, ctx->orig_length);
703 return APR_SUCCESS;
707 * Process a data bucket; append data into a moving window buffer
708 * and encode it with crcsync algorithm when window contains enough
709 * data for crcsync to find potential matches
711 static apr_status_t process_data_bucket(ap_filter_t *f, apr_bucket *e)
713 request_rec *r = f->r;
714 crccache_ctx *ctx = f->ctx;
716 const char *data;
717 apr_size_t len;
718 apr_status_t rslt;
720 /* read */
721 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
722 ctx->orig_length += len;
723 // update our sha1 hash
724 EVP_DigestUpdate(&ctx->mdctx, data, len);
725 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE normal data in APR bucket, read %ld", len);
727 // append data to the buffer and encode buffer content using the crc_read_block magic
728 size_t bucket_used_count = 0;
729 size_t bucket_data_left;
730 while(bucket_used_count < len)
732 /* Append as much data as possible into the buffer */
733 bucket_data_left = len - bucket_used_count;
734 size_t copy_size = MIN(ctx->buffer_size-ctx->buffer_putpos, bucket_data_left);
735 memcpy(ctx->buffer+ctx->buffer_putpos, data+bucket_used_count, copy_size);
736 bucket_used_count += copy_size;
737 bucket_data_left -= copy_size;
738 ctx->buffer_putpos += copy_size;
739 /* flush the buffer if it is appropriate */
740 if (ctx->buffer_putpos == ctx->buffer_size)
742 // Buffer is filled to the end. Flush as much as possible
743 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
744 "CRCCACHE-ENCODE Buffer is filled to end, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)",
745 ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->tail_block_size);
746 while (ctx->buffer_putpos - ctx->buffer_digest_getpos > ctx->tail_block_size)
748 // We can still scan at least 1 tail block + 1 byte forward: try to flush next part
749 rslt = process_block(f);
750 if (rslt != APR_SUCCESS)
752 return rslt;
754 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
755 "CRCCACHE-ENCODE Processed a block, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)",
756 ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->tail_block_size);
759 if (ctx->buffer_putpos != ctx->buffer_read_getpos)
761 // Copy the remaining part of the buffer to the start of the buffer,
762 // so that it can be filled again as new data arrive
763 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
764 "CRCCACHE-ENCODE Moving %zu bytes to begin of buffer",
765 ctx->buffer_putpos - ctx->buffer_read_getpos);
766 memcpy(ctx->buffer, ctx->buffer + ctx->buffer_read_getpos, ctx->buffer_putpos - ctx->buffer_read_getpos);
768 // Reset getpos to the beginning of the buffer and putpos accordingly
769 ctx->buffer_putpos -= ctx->buffer_read_getpos;
770 ctx->buffer_digest_getpos -= ctx->buffer_read_getpos;
771 ctx->buffer_read_getpos = 0;
773 while (ctx->crc_read_block_result < 0 && ctx->buffer_putpos - ctx->buffer_digest_getpos > ctx->tail_block_size)
775 // Previous block matched exactly. Let's hope the next block as well
776 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
777 "CRCCACHE-ENCODE Previous block matched, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)",
778 ctx->buffer_read_getpos, ctx->buffer_digest_getpos, ctx->buffer_putpos, ctx->buffer_putpos-ctx->buffer_digest_getpos, ctx->tail_block_size);
779 rslt = process_block(f);
780 if (rslt != APR_SUCCESS)
782 return rslt;
786 return APR_SUCCESS; // Yahoo, all went well
790 * CACHE_OUT filter
791 * ----------------
793 * Deliver cached content (headers and body) up the stack.
795 static apr_status_t crccache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) {
796 apr_bucket *e;
797 request_rec *r = f->r;
798 crccache_ctx *ctx = f->ctx;
799 int zRC;
800 int return_code = APR_SUCCESS;
802 /* Do nothing if asked to filter nothing. */
803 if (APR_BRIGADE_EMPTY(bb)) {
804 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE bucket brigade is empty -> nothing todo");
805 return ap_pass_brigade(f->next, bb);
808 /* If state is not yet GS_ENCODING content, we need to ensure that it is okay to send
809 * the encoded content. If the state is GS_ENCODING, that means we've done
810 * this before and we liked it.
811 * This could be not so nice if we always fail. But, if we succeed,
812 * we're in better shape.
814 if (ctx->global_state != GS_ENCODING)
816 const char *encoding;
818 /* only work on main request/no subrequests */
819 if (r->main != NULL) {
820 ap_remove_output_filter(f);
821 return ap_pass_brigade(f->next, bb);
824 /* We can't operate on Content-Ranges */
825 if (apr_table_get(r->headers_out, "Content-Range") != NULL) {
826 ap_remove_output_filter(f);
827 return ap_pass_brigade(f->next, bb);
830 if (ctx->global_state != GS_HEADERS_SAVED)
832 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server, "CRCCACHE-ENCODE unexpected ctx-state: %d, expected: %d", ctx->global_state, GS_HEADERS_SAVED);
833 return APR_EGENERAL;
836 /* Indicate to caches that they may only re-use this response for a request
837 * with the same BLOCK_HEADER value as the current request
838 * Indicate to clients that the server supports crcsync, even if checks
839 * further down prevent this specific response from being crc-encoded
841 apr_table_mergen(r->headers_out, VARY_HEADER, BLOCK_HEADER);
843 /* If Content-Encoding is present and differs from "identity", we can't handle it */
844 encoding = apr_table_get(r->headers_out, ENCODING_HEADER);
845 if (encoding && strcasecmp(encoding, "identity")) {
846 ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
847 "Not encoding with crccache. It is already encoded with: %s", encoding);
848 ap_remove_output_filter(f);
849 return ap_pass_brigade(f->next, bb);
852 /* For a 304 or 204 response there is no entity included in
853 * the response and hence nothing to crc-encode. */
854 if (r->status == HTTP_NOT_MODIFIED || r->status ==HTTP_NO_CONTENT)
856 ap_remove_output_filter(f);
857 return ap_pass_brigade(f->next, bb);
860 /* All Ok. We're cool with filtering this. */
861 ctx->global_state = GS_ENCODING;
862 ctx->debug_skip_writing = 0;
863 ctx->orig_length = 0;
864 ctx->tx_length = 0;
865 ctx->tx_uncompressed_length = 0;
866 ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
868 /* Parse the input headers */
869 const char * header;
870 header = apr_table_get(r->headers_in, BLOCK_HEADER);
871 int version;
872 size_t file_size;
873 char * hashes;
874 if (decode_if_block_header(header,&version,&file_size,&hashes) < 0)
876 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,"crccache: failed to decode if-block header");
877 ap_remove_output_filter(f);
878 return ap_pass_brigade(f->next, bb);
880 // Decode the hashes
881 ctx->block_count = apr_base64_decode_len(hashes)/(HASH_SIZE/8);
882 // this may over allocate by a couple of bytes but no big deal
883 ctx->hashes = apr_palloc(r->pool, apr_base64_decode_len(hashes));
884 apr_base64_decode((char *)ctx->hashes, hashes);
885 free(hashes);
886 hashes = NULL;
888 ctx->block_size = file_size/ctx->block_count;
889 ctx->tail_block_size = ctx->block_size + file_size % ctx->block_count;
890 size_t block_count_including_final_block = ctx->block_count;// + (ctx->tail_block_size != 0);
891 ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
892 "If-block header decoded, version %d: %d hashes of %d and one of %d", version, ctx->block_count-1,(int)ctx->block_size,(int)ctx->tail_block_size);
894 // swap to network byte order
895 int i;
896 for (i = 0; i < block_count_including_final_block;++i)
898 htobe64(ctx->hashes[i]);
901 // Data come in at chunks that are potentially smaller then block_size or tail_block_size
902 // Accumulate those chunks into a buffer.
903 // The buffer must be at least block_size+tail_block_size so that crc_read_block(...) can find a matching block, regardless
904 // of the data alignment compared to the original page.
905 // The buffer is basically a moving window in the new page. So sometimes the last part of the buffer must be
906 // copied to the beginning again. The larger the buffer, the less often such a copy operation is required
907 // Though, the larger the buffer, the bigger the memory demand.
908 // A size of 3*block_size+tail_block_size+1 (20% of original file size) seems to be a good balance
910 // TODO: tune the buffer-size depending on the mime-type. Already compressed data (zip, gif, jpg, mpg, etc) will
911 // probably only have matching blocks if the file is totally unmodified. As soon as one byte differs in the original
912 // uncompressed data, the entire compressed data stream will be different anyway, so in such case it does not make
913 // much sense to even keep invoking the crc_read_block(...) function as soon as a difference has been found.
914 // Hence, no need to make a (potentially huge) buffer for these type of compressed (potentially huge, think about movies)
915 // data types.
916 ctx->buffer_size = ctx->block_size*3 + ctx->tail_block_size + 1;
917 ctx->buffer_digest_getpos = 0;
918 ctx->buffer_read_getpos = 0;
919 ctx->buffer_putpos = 0;
920 ctx->crc_read_block_result = 0;
921 ctx->buffer = apr_palloc(r->pool, ctx->buffer_size);
923 /* Setup deflate for compressing non-matched literal data */
924 ctx->compression_state = COMPRESSION_BUFFER_EMPTY;
925 // TODO: should I pass some apr_palloc based function to prevent memory leaks
926 //in case of unexpected errors?
928 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE size of compression stream: %zd",sizeof(*(ctx->compression_stream)));
929 ctx->compression_stream = apr_palloc(r->pool, sizeof(*(ctx->compression_stream)));
930 ctx->compression_stream->zalloc = Z_NULL;
931 ctx->compression_stream->zfree = Z_NULL;
932 ctx->compression_stream->opaque = Z_NULL;
933 zRC = deflateInit(ctx->compression_stream, Z_DEFAULT_COMPRESSION); // TODO: make compression level configurable
934 if (zRC != Z_OK)
936 // Can't initialize the compression engine for compressing literal data
937 deflateEnd(ctx->compression_stream); // free memory used by deflate
938 free(ctx->compression_stream);
939 ctx->compression_stream = NULL;
940 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
941 "unable to init Zlib: "
942 "deflateInit returned %d: URL %s",
943 zRC, r->uri);
944 ap_remove_output_filter(f);
945 return ap_pass_brigade(f->next, bb);
948 // initialise the context for our sha1 digest of the unencoded response
949 EVP_MD_CTX_init(&ctx->mdctx);
950 const EVP_MD *md = EVP_sha1();
951 EVP_DigestInit_ex(&ctx->mdctx, md, NULL);
953 // now initialise the crcsync context that will do the real work
954 ctx->crcctx = crc_context_new(ctx->block_size, HASH_SIZE,ctx->hashes, block_count_including_final_block, ctx->tail_block_size);
956 // Register a cleanup function to cleanup internal libz and crcsync resources
957 apr_pool_cleanup_register(r->pool, ctx, deflate_ctx_cleanup,
958 apr_pool_cleanup_null);
960 // All checks and initializations are OK
961 // Modify headers that are impacted by this transformation
962 apr_table_setn(r->headers_out, ENCODING_HEADER, CRCCACHE_ENCODING);
963 apr_table_unset(r->headers_out, "Content-Length");
964 apr_table_unset(r->headers_out, "Content-MD5");
965 crccache_check_etag(r, ctx, CRCCACHE_ENCODING);
967 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE Server end of context setup");
970 if (ctx->global_state != GS_ENCODING)
972 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server, "CRCCACHE-ENCODE unexpected ctx-state: %d, expected: %d", ctx->global_state, GS_ENCODING);
973 return APR_EGENERAL;
976 while (!APR_BRIGADE_EMPTY(bb))
978 const char *data;
979 apr_size_t len;
980 apr_status_t rslt;
982 e = APR_BRIGADE_FIRST(bb);
984 if (APR_BUCKET_IS_EOS(e))
986 // Process end of stream: flush data buffers, compression buffers, etc.
987 // and calculate a strong hash.
988 rslt = process_eos(f);
990 /* Remove EOS from the old list, and insert into the new. */
991 APR_BUCKET_REMOVE(e);
992 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
994 /* This filter is done once it has served up its content */
995 ap_remove_output_filter(f);
997 if (rslt != APR_SUCCESS)
999 return rslt; // A problem occurred. Abort the processing
1002 /* Okay, we've seen the EOS.
1003 * Time to pass it along down the chain.
1005 return ap_pass_brigade(f->next, ctx->bb);
1008 if (APR_BUCKET_IS_FLUSH(e))
1010 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE flush APR bucket");
1011 apr_status_t rv;
1013 /* Remove flush bucket from old brigade and insert into the new. */
1014 APR_BUCKET_REMOVE(e);
1015 // TODO: optimize; do not insert two consecutive flushes when no intermediate
1016 // output block was written
1017 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
1018 rv = ap_pass_brigade(f->next, ctx->bb);
1019 if (rv != APR_SUCCESS) {
1020 return rv;
1022 continue;
1025 if (APR_BUCKET_IS_METADATA(e)) {
1027 * Remove meta data bucket from old brigade and insert into the
1028 * new.
1030 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
1031 if (len > 2)
1032 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1033 "CRCCACHE-ENCODE Metadata, read %zu, %d %d %d",len,data[0],data[1],data[2]);
1034 else
1035 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
1036 "CRCCACHE-ENCODE Metadata, read %zu",len);
1037 APR_BUCKET_REMOVE(e);
1038 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
1039 continue;
1042 // Bucket is non of the above types. Assume it is a data bucket
1043 // which means it can be encoded with the crcsync algorithm
1044 rslt = process_data_bucket(f, e);
1046 APR_BUCKET_REMOVE(e);
1047 if (rslt != APR_SUCCESS)
1049 break; // A problem occurred. Abort the processing
1053 apr_brigade_cleanup(bb);
1054 return return_code;
1059 * CACHE_OUT_SAVE_HEADERS filter
1060 * ----------------
1062 * Save headers into the context
1064 static apr_status_t crccache_out_save_headers_filter(ap_filter_t *f, apr_bucket_brigade *bb) {
1065 request_rec *r = f->r;
1066 crccache_ctx *ctx = f->ctx;
1068 /* Do nothing if asked to filter nothing. */
1069 if (APR_BRIGADE_EMPTY(bb)) {
1070 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE (save headers) bucket brigade is empty -> nothing todo");
1071 return ap_pass_brigade(f->next, bb);
1074 if (ctx->global_state != GS_INIT)
1076 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server, "CRCCACHE-ENCODE (save headers) unexpected ctx-state: %d, expected: %d", ctx->global_state, GS_INIT);
1077 return APR_EGENERAL;
1080 /* only work on main request/no subrequests */
1081 if (r->main != NULL) {
1082 ap_remove_output_filter(f);
1083 return ap_pass_brigade(f->next, bb);
1086 /* We can't operate on Content-Ranges */
1087 if (apr_table_get(r->headers_out, "Content-Range") != NULL) {
1088 ap_remove_output_filter(f);
1089 return ap_pass_brigade(f->next, bb);
1092 /* Save content-encoding and etag header for later usage by the crcsync
1093 * encoder
1095 const char *encoding = apr_table_get(r->headers_out, ENCODING_HEADER);
1096 if (encoding != NULL)
1098 ctx->old_content_encoding = apr_pstrdup(r->pool, encoding);
1099 ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
1100 "Saved old content-encoding: %s", encoding);
1102 const char *etag = apr_table_get(r->headers_out, ETAG_HEADER);
1103 if (etag != NULL)
1105 ctx->old_etag = apr_pstrdup(r->pool, etag);
1106 ap_log_error(APLOG_MARK, APLOG_INFO, APR_SUCCESS, r->server,
1107 "Saved old etag: %s", etag);
1109 ctx->global_state = GS_HEADERS_SAVED;
1111 /* Done saving headers. Nothing left to do */
1112 ap_remove_output_filter(f);
1113 return ap_pass_brigade(f->next, bb);
1117 static void crccache_server_register_hook(apr_pool_t *p) {
1118 ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL,
1119 "Registering crccache server module, (C) 2009, Toby Collett and Alex Wulms");
1121 ap_hook_header_parser(crccache_server_header_parser_handler, NULL, NULL,
1122 APR_HOOK_MIDDLE);
1124 ap_register_output_filter("CRCCACHE_HEADER", crccache_server_header_filter_handler,
1125 NULL, AP_FTYPE_PROTOCOL);
1127 crccache_out_save_headers_filter_handle = ap_register_output_filter("CRCCACHE_OUT_SAVE_HEADERS",
1128 crccache_out_save_headers_filter, NULL, AP_FTYPE_RESOURCE-1); // make sure to handle it *before* INFLATE filter (or other decode modules)
1130 crccache_out_filter_handle = ap_register_output_filter("CRCCACHE_OUT",
1131 crccache_out_filter, NULL, AP_FTYPE_CONTENT_SET);
1134 module AP_MODULE_DECLARE_DATA crccache_server_module = {
1135 STANDARD20_MODULE_STUFF, NULL, /* create per-directory config structure */
1136 NULL , /* merge per-directory config structures */
1137 crccache_server_create_config, /* create per-server config structure */
1138 NULL , /* merge per-server config structures */
1139 crccache_server_cmds, /* command apr_table_t */
1140 crccache_server_register_hook /* register hooks */