removed some of the debug logging and added author details
[httpd-crcsyncproxy.git] / crccache / mod_crccache_server.c
blob33160344807e5e1bc83a7589b9ab11c1d69d0571
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache server module
19 * This module is designed to run as a proxy server on the remote end of a slow
20 * internet link. This module uses a crc32 running hash algorithm to reduce
21 * data transfer in cached but modified downstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Author: Toby Collett (2009)
29 #include "apr_file_io.h"
30 #include "apr_strings.h"
31 #include "mod_cache.h"
32 #include "mod_disk_cache.h"
33 #include "ap_provider.h"
34 #include "util_filter.h"
35 #include "util_script.h"
36 #include "util_charset.h"
38 #include "crccache.h"
39 #include "mod_crccache_server.h"
41 #include <crcsync/crcsync.h>
43 const int bufferSize = 1024;
45 module AP_MODULE_DECLARE_DATA crccache_server_module;
47 //#define MIN(X,Y) (X<Y?X:Y)
49 static void *create_config(apr_pool_t *p, server_rec *s) {
50 crccache_server_conf *conf = apr_pcalloc(p, sizeof(crccache_server_conf));
51 conf->disk_cache_conf = apr_pcalloc(p, sizeof(disk_cache_conf));
53 /* XXX: Set default values */
54 conf->enabled = 0;
55 conf->disk_cache_conf->dirlevels = DEFAULT_DIRLEVELS;
56 conf->disk_cache_conf->dirlength = DEFAULT_DIRLENGTH;
57 conf->disk_cache_conf->maxfs = DEFAULT_MAX_FILE_SIZE;
58 conf->disk_cache_conf->minfs = DEFAULT_MIN_FILE_SIZE;
60 conf->disk_cache_conf->cache_root = NULL;
61 conf->disk_cache_conf->cache_root_len = 0;
63 return conf;
66 typedef struct crccache_ctx_t {
67 unsigned char *buffer;
68 size_t buffer_count;
69 apr_bucket_brigade *bb;
70 size_t block_size;
71 unsigned hashes[BLOCK_COUNT];
72 struct crc_context *crcctx;
73 size_t orig_length;
74 size_t tx_length;
75 } crccache_ctx;
78 * mod_disk_cache configuration directives handlers.
80 static const char *set_cache_root(cmd_parms *parms, void *in_struct_ptr,
81 const char *arg) {
82 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
83 &crccache_server_module);
84 conf->disk_cache_conf->cache_root = arg;
85 conf->disk_cache_conf->cache_root_len = strlen(arg);
86 /* TODO: canonicalize cache_root and strip off any trailing slashes */
88 return NULL;
92 * Only enable CRCCache Server when requested through the config file
93 * so that the user can switch CRCCache server on in a specific virtual server
95 static const char *set_crccache_server(cmd_parms *parms, void *dummy, int flag)
97 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
98 &crccache_server_module);
99 conf->enabled = flag;
100 return NULL;
105 * Consider eliminating the next two directives in favor of
106 * Ian's prime number hash...
107 * key = hash_fn( r->uri)
108 * filename = "/key % prime1 /key %prime2/key %prime3"
110 static const char *set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr,
111 const char *arg) {
112 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
113 &crccache_server_module);
114 int val = atoi(arg);
115 if (val < 1)
116 return "CacheDirLevelsServer value must be an integer greater than 0";
117 if (val * conf->disk_cache_conf->dirlength > CACHEFILE_LEN)
118 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
119 conf->disk_cache_conf->dirlevels = val;
120 return NULL;
122 static const char *set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr,
123 const char *arg) {
124 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
125 &crccache_server_module);
126 int val = atoi(arg);
127 if (val < 1)
128 return "CacheDirLengthServer value must be an integer greater than 0";
129 if (val * conf->disk_cache_conf->dirlevels > CACHEFILE_LEN)
130 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
132 conf->disk_cache_conf->dirlength = val;
133 return NULL;
136 static const char *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr,
137 const char *arg) {
138 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
139 &crccache_server_module);
141 if (apr_strtoff(&conf->disk_cache_conf->minfs, arg, NULL, 0) != APR_SUCCESS || conf->disk_cache_conf->minfs
142 < 0) {
143 return "CacheMinFileSizeServer argument must be a non-negative integer representing the min size of a file to cache in bytes.";
145 return NULL;
148 static const char *set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr,
149 const char *arg) {
150 crccache_server_conf *conf = ap_get_module_config(parms->server->module_config,
151 &crccache_server_module);
152 if (apr_strtoff(&conf->disk_cache_conf->maxfs, arg, NULL, 0) != APR_SUCCESS || conf->disk_cache_conf->maxfs
153 < 0) {
154 return "CacheMaxFileSizeServer argument must be a non-negative integer representing the max size of a file to cache in bytes.";
156 return NULL;
159 static const command_rec disk_cache_cmds[] = { AP_INIT_TAKE1("CacheRootServer", set_cache_root, NULL, RSRC_CONF,
160 "The directory to store cache files"), AP_INIT_TAKE1("CacheDirLevelsServer", set_cache_dirlevels, NULL, RSRC_CONF,
161 "The number of levels of subdirectories in the cache"), AP_INIT_TAKE1("CacheDirLengthServer", set_cache_dirlength, NULL, RSRC_CONF,
162 "The number of characters in subdirectory names"), AP_INIT_TAKE1("CacheMinFileSizeServer", set_cache_minfs, NULL, RSRC_CONF,
163 "The minimum file size to cache a document"), AP_INIT_TAKE1("CacheMaxFileSizeServer", set_cache_maxfs, NULL, RSRC_CONF,
164 "The maximum file size to cache a document"), AP_INIT_FLAG("CRCcacheServer", set_crccache_server, NULL, RSRC_CONF,
165 "Enable the CRCCache server in this virtual server"),{ NULL } };
167 static ap_filter_rec_t *crccache_out_filter_handle;
169 static int crccache_server_header_parser_handler(request_rec *r) {
170 crccache_server_conf *conf = ap_get_module_config(r->server->module_config,
171 &crccache_server_module);
172 if (conf->enabled)
174 const char * hashes, *block_size_header;
175 hashes = apr_table_get(r->headers_in, "Block-Hashes");
176 block_size_header = apr_table_get(r->headers_in, "Block-Size");
177 if (hashes && block_size_header)
179 size_t block_size;
180 int ret = sscanf(block_size_header,"%ld",&block_size);
181 if (ret < 0)
183 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, "crccache: failed to convert block size header to int, %s",block_size_header);
184 return OK;
187 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "CRCSYNC: Block-Hashes header found so enabling protocol: %s",hashes);
188 ap_add_output_filter_handle(crccache_out_filter_handle,
189 NULL, r, r->connection);
194 return OK;
197 /* PR 39727: we're screwing up our clients if we leave a strong ETag
198 * header while transforming content. Henrik Nordstrom suggests
199 * appending ";gzip".
201 * Pending a more thorough review of our Etag handling, let's just
202 * implement his suggestion. It fixes the bug, or at least turns it
203 * from a showstopper to an inefficiency. And it breaks nothing that
204 * wasn't already broken.
206 static void crccache_check_etag(request_rec *r, const char *transform) {
207 const char *etag = apr_table_get(r->headers_out, "ETag");
208 if (etag && (((etag[0] != 'W') && (etag[0] != 'w')) || (etag[1] != '/'))) {
209 apr_table_set(r->headers_out, "ETag", apr_pstrcat(r->pool, etag, "-",
210 transform, NULL));
215 * CACHE_OUT filter
216 * ----------------
218 * Deliver cached content (headers and body) up the stack.
220 static int crccache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) {
221 apr_bucket *e;
222 request_rec *r = f->r;
223 crccache_ctx *ctx = f->ctx;
225 /* Do nothing if asked to filter nothing. */
226 if (APR_BRIGADE_EMPTY(bb)) {
227 return ap_pass_brigade(f->next, bb);
230 /* If we don't have a context, we need to ensure that it is okay to send
231 * the deflated content. If we have a context, that means we've done
232 * this before and we liked it.
233 * This could be not so nice if we always fail. But, if we succeed,
234 * we're in better shape.
236 if (!ctx)
238 const char *encoding;
240 /* only work on main request/no subrequests */
241 if (r->main != NULL) {
242 ap_remove_output_filter(f);
243 return ap_pass_brigade(f->next, bb);
246 /* We can't operate on Content-Ranges */
247 if (apr_table_get(r->headers_out, "Content-Range") != NULL) {
248 ap_remove_output_filter(f);
249 return ap_pass_brigade(f->next, bb);
252 /* Let's see what our current Content-Encoding is.
253 * If it's already encoded, don't compress again.
254 * (We could, but let's not.)
256 encoding = apr_table_get(r->headers_out, "Content-Encoding");
257 if (encoding && strcasecmp(CRCCACHE_ENCODING,encoding) == 0)
259 /* Even if we don't accept this request based on it not having
260 * the Accept-Encoding, we need to note that we were looking
261 * for this header and downstream proxies should be aware of that.
263 apr_table_mergen(r->headers_out, "Vary", "Accept-Encoding");
264 ap_remove_output_filter(f);
265 return ap_pass_brigade(f->next, bb);
268 /* For a 304 or 204 response there is no entity included in
269 * the response and hence nothing to deflate. */
270 if (r->status == HTTP_NOT_MODIFIED || r->status ==HTTP_NO_CONTENT)
272 ap_remove_output_filter(f);
273 return ap_pass_brigade(f->next, bb);
276 /* We're cool with filtering this. */
277 ctx = f->ctx = apr_pcalloc(r->pool, sizeof(*ctx));
278 ctx->orig_length = 0;
279 ctx->tx_length = 0;
280 ctx->bb = apr_brigade_create(r->pool, f->c->bucket_alloc);
282 /* If the entire Content-Encoding is "identity", we can replace it. */
283 if (!encoding || !strcasecmp(encoding, "identity")) {
284 apr_table_setn(r->headers_out, "Content-Encoding", CRCCACHE_ENCODING);
286 else {
287 apr_table_mergen(r->headers_out, "Content-Encoding", CRCCACHE_ENCODING);
289 apr_table_unset(r->headers_out, "Content-Length");
290 apr_table_unset(r->headers_out, "Content-MD5");
291 crccache_check_etag(r, CRCCACHE_ENCODING);
293 const char * hashes, *block_size_header;
294 hashes = apr_table_get(r->headers_in, "Block-Hashes");
295 block_size_header = apr_table_get(r->headers_in, "Block-Size");
297 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
298 "crccache encoding block size %s", block_size_header);
300 errno=0;
301 ctx->block_size = strtoull(block_size_header,NULL,0);
302 if (errno || ctx->block_size <= 0)
304 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,"crccache: failed to convert block size header to int, %s",block_size_header);
305 ap_remove_output_filter(f);
306 return ap_pass_brigade(f->next, bb);
309 // allocate a buffer of twice our block size so we can store non matching parts of data as it comes in
310 ctx->buffer_count = 0;
311 ctx->buffer = apr_palloc(r->pool, ctx->block_size*2);
313 int ii;
314 for (ii = 0; ii < BLOCK_COUNT; ++ii)
316 ctx->hashes[ii] = decode_30bithash(&hashes[ii*HASH_BASE64_SIZE_TX]);
317 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "cache: decoded hash[%d] %08X",ii,ctx->hashes[ii]);
320 // now initialise the crcsync context that will do the real work
321 ctx->crcctx = crc_context_new(ctx->block_size, HASH_SIZE,ctx->hashes, BLOCK_COUNT);
328 while (!APR_BRIGADE_EMPTY(bb))
330 const char *data;
331 apr_size_t len;
333 e = APR_BRIGADE_FIRST(bb);
335 if (APR_BUCKET_IS_EOS(e))
337 // send one last literal if we still have unmatched data
338 if (ctx->buffer_count > 0)
340 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE final literal %ld bytes",ctx->buffer_count);
341 unsigned bucket_size = ctx->buffer_count + ENCODING_LITERAL_HEADER_SIZE;
342 ctx->tx_length += bucket_size;
343 char * buf = apr_palloc(r->pool, bucket_size);
345 buf[0] = ENCODING_LITERAL;
346 *(unsigned *)&buf[1] = htonl(ctx->buffer_count);
347 memcpy(&buf[5], ctx->buffer,ctx->buffer_count);
349 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
350 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
354 // TODO: add strong hash here
357 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
358 "CRCCACHE-ENCODE complete size %f%% (encoded=%ld original=%ld",100.0*((float)ctx->tx_length/(float)ctx->orig_length),ctx->tx_length, ctx->orig_length);
361 /* Remove EOS from the old list, and insert into the new. */
362 APR_BUCKET_REMOVE(e);
363 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
365 /* This filter is done once it has served up its content */
366 ap_remove_output_filter(f);
368 /* Okay, we've seen the EOS.
369 * Time to pass it along down the chain.
371 return ap_pass_brigade(f->next, ctx->bb);
374 if (APR_BUCKET_IS_FLUSH(e))
376 apr_status_t rv;
378 /* Remove flush bucket from old brigade and insert into the new. */
379 APR_BUCKET_REMOVE(e);
380 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
381 rv = ap_pass_brigade(f->next, ctx->bb);
382 if (rv != APR_SUCCESS) {
383 return rv;
385 continue;
388 if (APR_BUCKET_IS_METADATA(e)) {
390 * Remove meta data bucket from old brigade and insert into the
391 * new.
393 // TODO: do we need to encode metadata
394 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
395 if (len > 2)
396 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
397 "CRCCACHE-ENCODE: Metadata, read %ld, %d %d %d",len,data[0],data[1],data[2]);
398 else
399 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
400 "CRCCACHE-ENCODE: Metadata, read %ld",len);
401 APR_BUCKET_REMOVE(e);
402 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
403 continue;
406 /* read */
407 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
408 ctx->orig_length += len;
410 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"cache: running CRCCACHE_OUT filter, read %ld bytes",len);
412 // TODO: make this a little more efficient so we need to copy less data around
413 size_t bucket_used_count = 0;
414 size_t data_left;
415 while(bucket_used_count < len)
417 const char * source_array = data;
418 size_t source_offset = bucket_used_count;
419 data_left = len - bucket_used_count;
420 size_t source_length = data_left;
421 // if we have some data in our buffer, we need to full up the buffer until we have enough to match a block
422 if (ctx->buffer_count > 0 || data_left < ctx->block_size)
424 size_t copy_size = MIN(ctx->block_size*2-ctx->buffer_count,data_left);
425 memcpy(&ctx->buffer[ctx->buffer_count],&data[bucket_used_count],copy_size);
426 ctx->buffer_count += copy_size;
427 bucket_used_count += copy_size;
428 data_left = len - bucket_used_count;
429 source_array = (char *)ctx->buffer;
430 source_offset = 0;
431 source_length = ctx->buffer_count;
432 // not enough to match a block so stop here
433 if (ctx->buffer_count < ctx->block_size)
434 break;
437 long result;
438 size_t count = crc_read_block(ctx->crcctx, &result,
439 &source_array[source_offset], source_length);;
441 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "crccache: CRCSYNC, processed %ld, used %ld bytes, result was %ld",source_length,count,result);
443 // do different things if we match a literal or block
444 if (result > 0)
446 // didnt match a block, send a literal
448 // if we matched all our data as a literal
449 // update our used byte count, we can only be sure that 1+count-blocksize bytes are not in a block
450 // as the tail end of the buffer could match when more data is added to it.
451 if (count == source_length)
453 if (count > (ctx->block_size -1))
454 count -=(ctx->block_size -1);
455 else
456 count = 0;
459 if (count > 0)
461 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE literal %ld bytes",count);
462 unsigned bucket_size = count + ENCODING_LITERAL_HEADER_SIZE;
463 ctx->tx_length += bucket_size;
464 char * buf = apr_palloc(r->pool, bucket_size);
466 buf[0] = ENCODING_LITERAL;
467 *(unsigned *)&buf[1] = htonl(count);
468 memcpy(&buf[5],&source_array[source_offset],count);
470 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
471 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
474 else if (result < 0)
476 // matched send a block
477 unsigned bucket_size = ENCODING_BLOCK_HEADER_SIZE;
478 ctx->tx_length += bucket_size;
479 char * buf = apr_palloc(r->pool, bucket_size);
481 // we used a block of data
482 count = ctx->block_size;
484 buf[0] = ENCODING_BLOCK;
485 buf[1] = (unsigned char) (result * -1 - 1); // invert and get back to zero based
486 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE block %d",buf[1]);
487 apr_bucket * b = apr_bucket_pool_create(buf, bucket_size, r->pool, f->c->bucket_alloc);
488 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
490 else
492 // something odd happened here
493 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
494 "crccache: CRCSYNC, no data, processed %ld bytes, result was %ld",count,result);
497 if (ctx->buffer_count > 0)
499 // if we have used up all of our buffer, stop using it and use the bucket directly
500 if (ctx->buffer_count - count < bucket_used_count)
502 size_t extra_data = ctx->buffer_count - bucket_used_count;
503 bucket_used_count = count - extra_data;
504 ctx->buffer_count = 0;
506 else
508 // otherwise memmove the unused data to the start of the buffer
509 memmove(ctx->buffer,&ctx->buffer[count],ctx->buffer_count - count);
510 ctx->buffer_count -= count;
511 bucket_used_count += count;
514 else
516 bucket_used_count += count;
520 APR_BUCKET_REMOVE(e);
524 apr_brigade_cleanup(bb);
525 return APR_SUCCESS;
528 static void disk_cache_register_hook(apr_pool_t *p) {
529 ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL,
530 "Registering crccache server module, (C) 2009, Toby Collett");
532 ap_hook_header_parser(crccache_server_header_parser_handler, NULL, NULL,
533 APR_HOOK_MIDDLE);
536 * CACHE_OUT must go into the filter chain after a possible DEFLATE
537 * filter to ensure that already compressed cache objects do not
538 * get compressed again. Incrementing filter type by 1 ensures
539 * his happens.
541 crccache_out_filter_handle = ap_register_output_filter("CRCCACHE_OUT",
542 crccache_out_filter, NULL, AP_FTYPE_CONTENT_SET + 1);
545 module AP_MODULE_DECLARE_DATA crccache_server_module = {
546 STANDARD20_MODULE_STUFF, NULL, /* create per-directory config structure */
547 NULL , /* merge per-directory config structures */
548 create_config, /* create per-server config structure */
549 NULL , /* merge per-server config structures */
550 disk_cache_cmds, /* command apr_table_t */
551 disk_cache_register_hook /* register hooks */