Properly support 'merged' tail block
[httpd-crcsyncproxy.git] / crccache / mod_crccache_client.c
blobaa6e90964b42212e724048334fffd6e00599c484
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache client module
19 * This module is designed to run as a cache server on the local end of a slow
20 * internet link. This module uses a crc running hash algorithm to reduce
21 * data transfer in cached but modified upstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Author: Toby Collett (2009)
26 * Contributor: Alex Wulms (2009)
32 #include <assert.h>
34 #include <apr_file_io.h>
35 #include <apr_strings.h>
36 #include <apr_base64.h>
37 #include <apr_lib.h>
38 #include <apr_date.h>
39 #include <apr_tables.h>
40 #include "ap_provider.h"
41 #include "util_filter.h"
42 #include "util_script.h"
43 #include "util_charset.h"
44 #include <http_log.h>
45 #include <http_protocol.h>
47 #include "crccache.h"
48 #include "ap_wrapper.h"
49 #include <crcsync/crcsync.h>
50 #include <crc/crc.h>
51 #include <zlib.h>
53 #include "mod_crccache_client.h"
55 static ap_filter_rec_t *crccache_decode_filter_handle;
56 static ap_filter_rec_t *cache_save_filter_handle;
57 static ap_filter_rec_t *cache_save_subreq_filter_handle;
59 module AP_MODULE_DECLARE_DATA crccache_client_module;
60 APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key;
63 static int crccache_client_post_config(apr_pool_t *p, apr_pool_t *plog,
64 apr_pool_t *ptemp, server_rec *s)
66 /* This is the means by which unusual (non-unix) os's may find alternate
67 * means to run a given command (e.g. shebang/registry parsing on Win32)
69 cache_generate_key = APR_RETRIEVE_OPTIONAL_FN(ap_cache_generate_key);
70 if (!cache_generate_key) {
71 cache_generate_key = cache_generate_key_default;
73 return OK;
77 /**
78 * Clean-up memory used by helper libraries, that don't know about apr_palloc
79 * and that (probably) use classical malloc/free
81 apr_status_t deflate_ctx_cleanup(void *data)
83 crccache_client_ctx *ctx = (crccache_client_ctx *)data;
85 if (ctx != NULL)
87 if (ctx->decompression_state != DECOMPRESSION_ENDED)
89 inflateEnd(ctx->decompression_stream);
90 ctx->decompression_state = DECOMPRESSION_ENDED;
93 return APR_SUCCESS;
98 * Reads headers from a buffer and returns an array of headers.
99 * Returns NULL on file error
100 * This routine tries to deal with too long lines and continuation lines.
101 * @@@: XXX: FIXME: currently the headers are passed thru un-merged.
102 * Is that okay, or should they be collapsed where possible?
104 apr_status_t recall_headers(cache_handle_t *h, request_rec *r) {
105 const char *data;
106 apr_size_t len;
107 apr_bucket *e;
108 unsigned i;
109 int z_RC;
111 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
113 /* This case should not happen... */
114 if (!dobj->hfd) {
115 /* XXX log message */
116 return APR_NOTFOUND;
119 h->req_hdrs = apr_table_make(r->pool, 20);
120 h->resp_hdrs = apr_table_make(r->pool, 20);
122 /* Call routine to read the header lines/status line */
123 read_table(h, r, h->resp_hdrs, dobj->hfd);
124 read_table(h, r, h->req_hdrs, dobj->hfd);
126 e = apr_bucket_file_create(dobj->fd, 0, (apr_size_t) dobj->file_size, r->pool,
127 r->connection->bucket_alloc);
129 /* read */
130 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
132 // this will be rounded down, but thats okay
133 size_t blocksize = len/FULL_BLOCK_COUNT;
134 size_t tail_block_size = blocksize + len % FULL_BLOCK_COUNT;
135 size_t block_count_including_final_block = FULL_BLOCK_COUNT;// + (tail_block_size != 0);
136 // sanity check for very small files
137 if (blocksize> 4)
139 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"crccache: %d blocks of %ld bytes, one block of %ld bytes",FULL_BLOCK_COUNT-1,blocksize,tail_block_size);
141 crccache_client_ctx * ctx;
142 ctx = apr_pcalloc(r->pool, sizeof(*ctx));
143 ctx->bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
144 ctx->block_size = blocksize;
145 ctx->tail_block_size = tail_block_size;
146 ctx->state = DECODING_NEW_SECTION;
147 ctx->cached_bucket = e;
149 // Setup inflate for decompressing non-matched literal data
150 ctx->decompression_stream = apr_palloc(r->pool, sizeof(*(ctx->decompression_stream)));
151 ctx->decompression_stream->zalloc = Z_NULL;
152 ctx->decompression_stream->zfree = Z_NULL;
153 ctx->decompression_stream->opaque = Z_NULL;
154 ctx->decompression_stream->avail_in = 0;
155 ctx->decompression_stream->next_in = Z_NULL;
156 z_RC = inflateInit(ctx->decompression_stream);
157 if (z_RC != Z_OK)
159 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, r->server,
160 "Can not initialize decompression engine, return code: %d", z_RC);
161 return APR_SUCCESS;
163 ctx->decompression_state = DECOMPRESSION_INITIALIZED;
165 // Register a cleanup function to cleanup internal libz resources
166 apr_pool_cleanup_register(r->pool, ctx, deflate_ctx_cleanup,
167 apr_pool_cleanup_null);
169 // All OK to go for the crcsync decoding: add the headers
170 // and set-up the decoding filter
172 // add one for base 64 overflow and null terminator
173 char hash_set[HASH_HEADER_SIZE+1];
175 uint64_t crcs[block_count_including_final_block];
176 crc_of_blocks(data, len, blocksize, HASH_SIZE, true, crcs);
177 // for (i = 0; i < FULL_BLOCK_COUNT - 1; i++) {
178 // crcs[i] = crc64_iso(0, &data[i*blocksize], blocksize);
179 // }
180 // crcs[FULL_BLOCK_COUNT-1] = crc64_iso(0, &data[(FULL_BLOCK_COUNT-1)*blocksize], tail_block_size);
182 // swap to network byte order
183 for (i = 0; i < block_count_including_final_block;++i)
185 htobe64(crcs[i]);
188 apr_base64_encode (hash_set, (char *)crcs, block_count_including_final_block*sizeof(crcs[0]));
189 hash_set[HASH_HEADER_SIZE] = '\0';
190 //apr_bucket_delete(e);
192 // TODO; bit of a safety margin here, could calculate exact size
193 const int block_header_max_size = HASH_HEADER_SIZE+40;
194 char block_header_txt[block_header_max_size];
195 snprintf(block_header_txt, block_header_max_size,"v=1, fs=%zu, h=%s",len,hash_set);
196 apr_table_set(r->headers_in, BLOCK_HEADER, block_header_txt);
197 // TODO: do we want to cache the hashes here?
199 // initialise the context for our sha1 digest of the unencoded response
200 EVP_MD_CTX_init(&ctx->mdctx);
201 const EVP_MD *md = EVP_sha1();
202 EVP_DigestInit_ex(&ctx->mdctx, md, NULL);
204 // we want to add a filter here so that we can decode the response.
205 // we need access to the original cached data when we get the response as
206 // we need that to fill in the matched blocks.
207 ap_add_output_filter_handle(crccache_decode_filter_handle,
208 ctx, r, r->connection);
210 // TODO: why is hfd file only closed in this case?
211 apr_file_close(dobj->hfd);
213 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
214 "crccache_client: Recalled headers for URL %s", dobj->name);
215 return APR_SUCCESS;
219 * CACHE_DECODE filter
220 * ----------------
222 * Deliver cached content (headers and body) up the stack.
224 static int crccache_decode_filter(ap_filter_t *f, apr_bucket_brigade *bb) {
225 apr_bucket *e;
226 request_rec *r = f->r;
227 // TODO: set up context type struct
228 crccache_client_ctx *ctx = f->ctx;
230 // if this is the first pass in decoding we should check the headers etc
231 // and fix up those headers that we modified as part of the encoding
232 if (ctx->headers_checked == 0)
234 ctx->headers_checked = 1;
236 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
237 "CRCSYNC returned status code (%d)", r->status);
239 // TODO: make this work if we have multiple encodings
240 const char * content_encoding;
241 content_encoding = apr_table_get(r->headers_out, ENCODING_HEADER);
242 if (content_encoding == NULL || strcmp(CRCCACHE_ENCODING, content_encoding)
243 != 0) {
244 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
245 "CRCSYNC not decoding, content encoding bad (%s)", content_encoding?content_encoding:"NULL");
246 ap_remove_output_filter(f);
247 return ap_pass_brigade(f->next, bb);
250 // remove the encoding header
251 apr_table_unset(r->headers_out, ENCODING_HEADER);
253 // remove If-Block from the Vary header
254 char * vary = apr_pstrdup(r->pool, apr_table_get(r->headers_out, "Vary"));
255 if (vary)
257 apr_table_unset(r->headers_out, "Vary");
258 char * tok;
259 char * last = NULL;
260 for (tok = apr_strtok(vary,", ",&last);tok != NULL;tok = apr_strtok(NULL,", ",&last))
262 if (strcmp(BLOCK_HEADER,tok)!=0)
264 apr_table_mergen(r->headers_out,"Vary",tok);
269 // fix up etag
270 char * etag = apr_pstrdup(r->pool, apr_table_get(r->headers_out, "etag"));
271 if (etag)
273 int etaglen = strlen(etag);
274 if (etaglen>strlen(CRCCACHE_ENCODING) + 1)
276 if (strcmp("-"CRCCACHE_ENCODING,&etag[etaglen-(strlen(CRCCACHE_ENCODING) + 1)])==0)
278 etag[etaglen-(strlen(CRCCACHE_ENCODING) + 1)] = '\0';
279 apr_table_setn(r->headers_out,"etag",etag);
287 /* Do nothing if asked to filter nothing. */
288 if (APR_BRIGADE_EMPTY(bb)) {
289 return ap_pass_brigade(f->next, bb);
292 /* We require that we have a context already, otherwise we dont have our cached file
293 * to fill in the gaps with.
295 if (!ctx) {
296 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
297 "No context available %s", r->uri);
298 ap_remove_output_filter(f);
299 return ap_pass_brigade(f->next, bb);
302 while (!APR_BRIGADE_EMPTY(bb))
304 const char *data;
305 apr_size_t len;
307 e = APR_BRIGADE_FIRST(bb);
309 if (APR_BUCKET_IS_EOS(e)) {
311 /* Remove EOS from the old list, and insert into the new. */
312 APR_BUCKET_REMOVE(e);
313 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
315 /* This filter is done once it has served up its content */
316 ap_remove_output_filter(f);
318 // check strong hash here
319 unsigned md_len;
320 unsigned char md_value[EVP_MAX_MD_SIZE];
321 EVP_DigestFinal_ex(&ctx->mdctx, md_value, &md_len);
322 EVP_MD_CTX_cleanup(&ctx->mdctx);
324 if (memcmp(md_value, ctx->md_value_rx, 20) != 0)
326 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE HASH CHECK FAILED");
327 apr_brigade_cleanup(bb);
328 return APR_EGENERAL;
330 else
332 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE HASH CHECK PASSED");
335 /* Okay, we've seen the EOS.
336 * Time to pass it along down the chain.
338 return ap_pass_brigade(f->next, ctx->bb);
341 if (APR_BUCKET_IS_FLUSH(e)) {
342 apr_status_t rv;
344 /* Remove flush bucket from old brigade anf insert into the new. */
345 APR_BUCKET_REMOVE(e);
346 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
347 rv = ap_pass_brigade(f->next, ctx->bb);
348 if (rv != APR_SUCCESS) {
349 return rv;
351 continue;
354 if (APR_BUCKET_IS_METADATA(e)) {
356 * Remove meta data bucket from old brigade and insert into the
357 * new.
359 APR_BUCKET_REMOVE(e);
360 APR_BRIGADE_INSERT_TAIL(ctx->bb, e);
361 continue;
364 /* read */
365 apr_bucket_read(e, &data, &len, APR_BLOCK_READ);
366 //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE read %zd bytes",len);
368 apr_size_t consumed_bytes = 0;
369 while (consumed_bytes < len)
371 //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE remaining %zd bytes",len - consumed_bytes);
372 // no guaruntee that our buckets line up with our encoding sections
373 // so we need a processing state machine stored in our context
374 switch (ctx->state)
376 case DECODING_NEW_SECTION:
378 // check if we have a compressed section or a block section
379 if (data[consumed_bytes] == ENCODING_COMPRESSED)
380 ctx->state = DECODING_COMPRESSED;
381 else if (data[consumed_bytes] == ENCODING_BLOCK)
382 ctx->state = DECODING_BLOCK_HEADER;
383 else if (data[consumed_bytes] == ENCODING_LITERAL)
385 ctx->state = DECODING_LITERAL_SIZE;
386 ctx->partial_literal = NULL;
387 ctx->rx_count = 0;
389 else if (data[consumed_bytes] == ENCODING_HASH)
391 ctx->state = DECODING_HASH;
392 ctx->rx_count = 0;
394 else
396 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,
397 "CRCSYNC-DECODE, unknown section %d(%c)",data[consumed_bytes],data[consumed_bytes]);
398 apr_brigade_cleanup(bb);
399 return APR_EGENERAL;
401 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE found a new section %d",ctx->state);
402 consumed_bytes++;
403 break;
405 case DECODING_BLOCK_HEADER:
407 unsigned char block_number = data[consumed_bytes];
408 consumed_bytes++;
409 ctx->state = DECODING_NEW_SECTION;
411 // TODO: Output the indicated block here
412 size_t current_block_size = block_number < FULL_BLOCK_COUNT-1 ? ctx->block_size : ctx->tail_block_size;
413 ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
414 "CRCSYNC-DECODE block section, block %d, size %zu" ,block_number, current_block_size);
416 char * buf = apr_palloc(r->pool, current_block_size);
417 const char * source_data;
418 size_t source_len;
419 apr_bucket_read(ctx->cached_bucket, &source_data, &source_len, APR_BLOCK_READ);
420 assert(block_number < (FULL_BLOCK_COUNT /*+ (ctx->tail_block_size != 0)*/));
421 memcpy(buf,&source_data[block_number*ctx->block_size],current_block_size);
422 // update our sha1 hash
423 EVP_DigestUpdate(&ctx->mdctx, buf, current_block_size);
424 apr_bucket * b = apr_bucket_pool_create(buf, current_block_size, r->pool, f->c->bucket_alloc);
425 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
426 break;
428 case DECODING_LITERAL_SIZE:
430 unsigned avail_in = len - consumed_bytes;
431 // if we havent got the full int then store the data for later
432 if (avail_in < 4 || ctx->rx_count != 0)
434 if (ctx->partial_literal == NULL)
436 ctx->partial_literal = apr_palloc(r->pool, 4);
438 unsigned len_to_copy = MIN(4-ctx->rx_count, avail_in);
439 memcpy(&ctx->partial_literal[ctx->rx_count], &data[consumed_bytes],len_to_copy);
440 ctx->rx_count += len_to_copy;
441 consumed_bytes += len_to_copy;
443 if (ctx->rx_count == 4)
445 ctx->literal_size = ntohl(*(unsigned*)ctx->partial_literal);
446 ctx->rx_count = 0;
448 else
450 break;
453 else
455 ctx->literal_size = ntohl(*(unsigned*)&data[consumed_bytes]);
456 consumed_bytes += 4;
458 ctx->partial_literal = apr_palloc(r->pool, ctx->literal_size);
459 ctx->state = DECODING_LITERAL_BODY;
460 break;
462 case DECODING_LITERAL_BODY:
464 unsigned avail_in = len - consumed_bytes;
465 unsigned len_to_copy = MIN(ctx->literal_size-ctx->rx_count, avail_in);
466 memcpy(&ctx->partial_literal[ctx->rx_count], &data[consumed_bytes],len_to_copy);
467 ctx->rx_count += len_to_copy;
468 consumed_bytes += len_to_copy;
470 if (ctx->rx_count == ctx->literal_size)
472 EVP_DigestUpdate(&ctx->mdctx, ctx->partial_literal, ctx->literal_size);
473 apr_bucket * b = apr_bucket_pool_create((char*)ctx->partial_literal, ctx->literal_size, r->pool, f->c->bucket_alloc);
474 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
475 ctx->state = DECODING_NEW_SECTION;
478 break;
480 case DECODING_HASH:
482 unsigned avail_in = len - consumed_bytes;
483 // 20 bytes for an SHA1 hash
484 unsigned needed = MIN(20-ctx->rx_count, avail_in);
485 memcpy(&ctx->md_value_rx[ctx->rx_count], &data[consumed_bytes],needed);
486 ctx->rx_count+=needed;
487 consumed_bytes += needed;
488 if (ctx->rx_count == 20)
490 ctx->state = DECODING_NEW_SECTION;
492 break;
494 case DECODING_COMPRESSED:
496 unsigned char decompressed_data_buf[30000];
497 int z_RC;
498 z_stream *strm = ctx->decompression_stream;
499 strm->avail_in = len - consumed_bytes;
500 strm->next_in = (Bytef *)(data + consumed_bytes);
501 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCSYNC-DECODE inflating %d bytes", strm.avail_in);
502 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, strm.next_in, strm.avail_in);
503 do {
504 strm->avail_out = sizeof(decompressed_data_buf);
505 strm->next_out = decompressed_data_buf;
506 uInt avail_in_pre_inflate = strm->avail_in;
507 z_RC = inflate(strm, Z_NO_FLUSH);
508 if (z_RC == Z_NEED_DICT || z_RC == Z_DATA_ERROR || z_RC == Z_MEM_ERROR)
510 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r->server, "CRCSYNC-DECODE inflate error: %d", z_RC);
511 apr_brigade_cleanup(bb);
512 return APR_EGENERAL;
514 int have = sizeof(decompressed_data_buf) - strm->avail_out;
515 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
516 "CRCSYNC-DECODE inflate rslt %d, consumed %d, produced %d",
517 z_RC, avail_in_pre_inflate - strm->avail_in, have);
518 if (have)
520 // write output data
521 char * buf = apr_palloc(r->pool, have);
522 memcpy(buf,decompressed_data_buf,have);
523 EVP_DigestUpdate(&ctx->mdctx, buf, have);
524 apr_bucket * b = apr_bucket_pool_create(buf, have, r->pool, f->c->bucket_alloc);
525 APR_BRIGADE_INSERT_TAIL(ctx->bb, b);
527 } while (strm->avail_out == 0);
528 consumed_bytes = len - strm->avail_in;
529 if (z_RC == Z_STREAM_END)
531 ctx->state = DECODING_NEW_SECTION;
532 inflateReset(strm);
534 break;
536 default:
538 ap_log_error(APLOG_MARK, APLOG_ERR, APR_SUCCESS, r->server,
539 "CRCSYNC-DECODE, unknown state %d, terminating transaction",ctx->state);
540 apr_brigade_cleanup(bb);
541 return APR_EGENERAL; // TODO: figure out how to pass the error on to the client
544 APR_BUCKET_REMOVE(e);
548 apr_brigade_cleanup(bb);
549 return APR_SUCCESS;
552 static void *crccache_client_create_config(apr_pool_t *p, server_rec *s) {
553 crccache_client_conf *conf = apr_pcalloc(p, sizeof(crccache_client_conf));
554 /* array of URL prefixes for which caching is enabled */
555 conf->cacheenable = apr_array_make(p, 10, sizeof(struct cache_enable));
556 /* array of URL prefixes for which caching is disabled */
557 conf->cachedisable = apr_array_make(p, 10, sizeof(struct cache_disable));
559 /* XXX: Set default values */
560 conf->dirlevels = DEFAULT_DIRLEVELS;
561 conf->dirlength = DEFAULT_DIRLENGTH;
562 conf->maxfs = DEFAULT_MAX_FILE_SIZE;
563 conf->minfs = DEFAULT_MIN_FILE_SIZE;
565 conf->cache_root = NULL;
566 conf->cache_root_len = 0;
568 return conf;
572 * mod_disk_cache configuration directives handlers.
574 static const char *set_cache_root(cmd_parms *parms, void *in_struct_ptr,
575 const char *arg) {
576 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
577 &crccache_client_module);
578 conf->cache_root = arg;
579 conf->cache_root_len = strlen(arg);
580 /* TODO: canonicalize cache_root and strip off any trailing slashes */
582 return NULL;
586 * Consider eliminating the next two directives in favor of
587 * Ian's prime number hash...
588 * key = hash_fn( r->uri)
589 * filename = "/key % prime1 /key %prime2/key %prime3"
591 static const char *set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr,
592 const char *arg) {
593 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
594 &crccache_client_module);
595 int val = atoi(arg);
596 if (val < 1)
597 return "CacheDirLevelsClient value must be an integer greater than 0";
598 if (val * conf->dirlength > CACHEFILE_LEN)
599 return "CacheDirLevelsClient*CacheDirLengthClient value must not be higher than 20";
600 conf->dirlevels = val;
601 return NULL;
603 static const char *set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr,
604 const char *arg) {
605 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
606 &crccache_client_module);
607 int val = atoi(arg);
608 if (val < 1)
609 return "CacheDirLengthClient value must be an integer greater than 0";
610 if (val * conf->dirlevels > CACHEFILE_LEN)
611 return "CacheDirLevelsClient*CacheDirLengthClient value must not be higher than 20";
613 conf->dirlength = val;
614 return NULL;
617 static const char *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr,
618 const char *arg) {
619 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
620 &crccache_client_module);
622 if (apr_strtoff(&conf->minfs, arg, NULL, 0) != APR_SUCCESS || conf->minfs
623 < 0) {
624 return "CacheMinFileSizeClient argument must be a non-negative integer representing the min size of a file to cache in bytes.";
626 return NULL;
629 static const char *set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr,
630 const char *arg) {
631 crccache_client_conf *conf = ap_get_module_config(parms->server->module_config,
632 &crccache_client_module);
633 if (apr_strtoff(&conf->maxfs, arg, NULL, 0) != APR_SUCCESS || conf->maxfs
634 < 0) {
635 return "CacheMaxFileSizeClient argument must be a non-negative integer representing the max size of a file to cache in bytes.";
637 return NULL;
640 static const char *add_crc_client_enable(cmd_parms *parms, void *dummy,
641 const char *url)
643 crccache_client_conf *conf;
644 struct cache_enable *new;
646 conf =
647 (crccache_client_conf *)ap_get_module_config(parms->server->module_config,
648 &crccache_client_module);
649 new = apr_array_push(conf->cacheenable);
650 if (apr_uri_parse(parms->pool, url, &(new->url))) {
651 return NULL;
653 if (new->url.path) {
654 new->pathlen = strlen(new->url.path);
655 } else {
656 new->pathlen = 1;
657 new->url.path = "/";
659 return NULL;
662 static const command_rec crccache_client_cmds[] =
664 AP_INIT_TAKE1("CRCClientEnable", add_crc_client_enable, NULL, RSRC_CONF, "A cache type and partial URL prefix below which caching is enabled"),
665 AP_INIT_TAKE1("CacheRootClient", set_cache_root, NULL, RSRC_CONF,"The directory to store cache files"),
666 AP_INIT_TAKE1("CacheDirLevelsClient", set_cache_dirlevels, NULL, RSRC_CONF, "The number of levels of subdirectories in the cache"),
667 AP_INIT_TAKE1("CacheDirLengthClient", set_cache_dirlength, NULL, RSRC_CONF, "The number of characters in subdirectory names"),
668 AP_INIT_TAKE1("CacheMinFileSizeClient", set_cache_minfs, NULL, RSRC_CONF, "The minimum file size to cache a document"),
669 AP_INIT_TAKE1("CacheMaxFileSizeClient", set_cache_maxfs, NULL, RSRC_CONF, "The maximum file size to cache a document"),
670 { NULL }
673 int ap_run_insert_filter(request_rec *r);
675 int crccache_client_url_handler(request_rec *r, int lookup)
677 const char *auth;
678 cache_request_rec *cache;
679 crccache_client_conf *conf;
681 /* Delay initialization until we know we are handling a GET */
682 if (r->method_number != M_GET) {
683 return DECLINED;
686 conf = (crccache_client_conf *) ap_get_module_config(r->server->module_config,
687 &crccache_client_module);
689 if (conf->cacheenable->nelts == 0)
690 return DECLINED;
692 /* make space for the per request config */
693 cache = (cache_request_rec *) ap_get_module_config(r->request_config,
694 &crccache_client_module);
695 if (!cache) {
696 cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
697 ap_set_module_config(r->request_config, &crccache_client_module, cache);
701 * Are we allowed to serve cached info at all?
704 /* find certain cache controlling headers */
705 auth = apr_table_get(r->headers_in, "Authorization");
707 /* First things first - does the request allow us to return
708 * cached information at all? If not, just decline the request.
710 if (auth) {
711 return DECLINED;
715 * Add cache_save filter to cache this request. Choose
716 * the correct filter by checking if we are a subrequest
717 * or not.
719 if (r->main) {
720 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
721 r->server,
722 "Adding CACHE_SAVE_SUBREQ filter for %s",
723 r->uri);
724 ap_add_output_filter_handle(cache_save_subreq_filter_handle,
725 NULL, r, r->connection);
727 else {
728 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
729 r->server, "Adding CACHE_SAVE filter for %s",
730 r->uri);
731 ap_add_output_filter_handle(cache_save_filter_handle,
732 NULL, r, r->connection);
735 cache_handle_t *h;
736 char *key;
738 if (cache_generate_key(r, r->pool, &key) != APR_SUCCESS) {
739 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
740 r->server, "Failed to generate key");
741 return DECLINED;
743 h = apr_palloc(r->pool, sizeof(cache_handle_t));
744 if (open_entity(h, r, key) != OK)
746 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
747 r->server, "Failed to open entity not good");
748 return DECLINED;
750 if (recall_headers(h, r) != APR_SUCCESS) {
751 /* TODO: Handle this error */
752 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
753 r->server, "Failed to recall headers");
754 return DECLINED;
756 cache->handle = h;
757 return DECLINED;
763 * CACHE_SAVE filter
764 * ---------------
766 * Decide whether or not this content should be cached.
767 * If we decide no it should not:
768 * remove the filter from the chain
769 * If we decide yes it should:
770 * Have we already started saving the response?
771 * If we have started, pass the data to the storage manager via store_body
772 * Otherwise:
773 * Check to see if we *can* save this particular response.
774 * If we can, call cache_create_entity() and save the headers and body
775 * Finally, pass the data to the next filter (the network or whatever)
778 int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in)
780 int rv = !OK;
781 request_rec *r = f->r;
782 cache_request_rec *cache;
783 crccache_client_conf *conf;
784 //const char *cc_out, *cl;
785 const char *cl;
786 const char *exps, /* *lastmods,*/ *dates;//, *etag;
787 apr_time_t exp, date,/* lastmod,*/ now;
788 apr_off_t size;
789 cache_info *info = NULL;
790 char *reason;
791 apr_pool_t *p;
793 conf = (crccache_client_conf *) ap_get_module_config(r->server->module_config,
794 &crccache_client_module);
796 /* Setup cache_request_rec */
797 cache = (cache_request_rec *) ap_get_module_config(r->request_config,
798 &crccache_client_module);
799 if (!cache) {
800 /* user likely configured CACHE_SAVE manually; they should really use
801 * mod_cache configuration to do that
803 cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
804 ap_set_module_config(r->request_config, &crccache_client_module, cache);
807 reason = NULL;
808 p = r->pool;
810 * Pass Data to Cache
811 * ------------------
812 * This section passes the brigades into the cache modules, but only
813 * if the setup section (see below) is complete.
815 if (cache->block_response) {
816 /* We've already sent down the response and EOS. So, ignore
817 * whatever comes now.
819 return APR_SUCCESS;
822 /* have we already run the cachability check and set up the
823 * cached file handle?
825 if (cache->in_checked) {
826 /* pass the brigades into the cache, then pass them
827 * up the filter stack
829 rv = store_body(cache->handle, r, in);
830 if (rv != APR_SUCCESS) {
831 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
832 "cache: Cache provider's store_body failed!");
833 ap_remove_output_filter(f);
835 return ap_pass_brigade(f->next, in);
839 * Setup Data in Cache
840 * -------------------
841 * This section opens the cache entity and sets various caching
842 * parameters, and decides whether this URL should be cached at
843 * all. This section is* run before the above section.
846 /* read expiry date; if a bad date, then leave it so the client can
847 * read it
849 exps = apr_table_get(r->err_headers_out, "Expires");
850 if (exps == NULL) {
851 exps = apr_table_get(r->headers_out, "Expires");
853 if (exps != NULL) {
854 if (APR_DATE_BAD == (exp = apr_date_parse_http(exps))) {
855 exps = NULL;
858 else {
859 exp = APR_DATE_BAD;
863 * what responses should we not cache?
865 * At this point we decide based on the response headers whether it
866 * is appropriate _NOT_ to cache the data from the server. There are
867 * a whole lot of conditions that prevent us from caching this data.
868 * They are tested here one by one to be clear and unambiguous.
870 if (r->status != HTTP_OK && r->status != HTTP_NON_AUTHORITATIVE
871 && r->status != HTTP_MULTIPLE_CHOICES
872 && r->status != HTTP_MOVED_PERMANENTLY
873 && r->status != HTTP_NOT_MODIFIED) {
874 /* RFC2616 13.4 we are allowed to cache 200, 203, 206, 300, 301 or 410
875 * We don't cache 206, because we don't (yet) cache partial responses.
876 * We include 304 Not Modified here too as this is the origin server
877 * telling us to serve the cached copy.
881 if (reason) {
882 /* noop */
885 else if (r->status == HTTP_NOT_MODIFIED &&
886 !cache->handle && !cache->stale_handle) {
887 /* if the server said 304 Not Modified but we have no cache
888 * file - pass this untouched to the user agent, it's not for us.
890 reason = "HTTP Status 304 Not Modified";
893 else if (r->header_only && !cache->stale_handle) {
894 /* Forbid HEAD requests unless we have it cached already */
895 reason = "HTTP HEAD request";
897 if (reason) {
898 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
899 "cache: %s not cached. Reason: %s", r->unparsed_uri,
900 reason);
902 /* remove this filter from the chain */
903 ap_remove_output_filter(f);
905 /* ship the data up the stack */
906 return ap_pass_brigade(f->next, in);
909 /* Make it so that we don't execute this path again. */
910 cache->in_checked = 1;
912 /* Set the content length if known.
914 cl = apr_table_get(r->err_headers_out, "Content-Length");
915 if (cl == NULL) {
916 cl = apr_table_get(r->headers_out, "Content-Length");
918 if (cl) {
919 char *errp;
920 if (apr_strtoff(&size, cl, &errp, 10) || *errp || size < 0) {
921 cl = NULL; /* parse error, see next 'if' block */
925 if (!cl) {
926 /* if we don't get the content-length, see if we have all the
927 * buckets and use their length to calculate the size
929 apr_bucket *e;
930 int all_buckets_here=0;
931 int unresolved_length = 0;
932 size=0;
933 for (e = APR_BRIGADE_FIRST(in);
934 e != APR_BRIGADE_SENTINEL(in);
935 e = APR_BUCKET_NEXT(e))
937 if (APR_BUCKET_IS_EOS(e)) {
938 all_buckets_here=1;
939 break;
941 if (APR_BUCKET_IS_FLUSH(e)) {
942 unresolved_length = 1;
943 continue;
945 if (e->length == (apr_size_t)-1) {
946 break;
948 size += e->length;
950 if (!all_buckets_here) {
951 size = -1;
955 /* It's safe to cache the response.
957 * There are two possiblities at this point:
958 * - cache->handle == NULL. In this case there is no previously
959 * cached entity anywhere on the system. We must create a brand
960 * new entity and store the response in it.
961 * - cache->stale_handle != NULL. In this case there is a stale
962 * entity in the system which needs to be replaced by new
963 * content (unless the result was 304 Not Modified, which means
964 * the cached entity is actually fresh, and we should update
965 * the headers).
968 /* Did we have a stale cache entry that really is stale?
970 * Note that for HEAD requests, we won't get the body, so for a stale
971 * HEAD request, we don't remove the entity - instead we let the
972 * CACHE_REMOVE_URL filter remove the stale item from the cache.
974 if (cache->stale_handle) {
975 if (r->status == HTTP_NOT_MODIFIED) {
976 /* Oh, hey. It isn't that stale! Yay! */
977 cache->handle = cache->stale_handle;
978 info = &cache->handle->cache_obj->info;
979 rv = OK;
981 else if (!r->header_only) {
982 /* Oh, well. Toss it. */
983 remove_entity(cache->stale_handle);
984 /* Treat the request as if it wasn't conditional. */
985 cache->stale_handle = NULL;
987 * Restore the original request headers as they may be needed
988 * by further output filters like the byterange filter to make
989 * the correct decisions.
991 r->headers_in = cache->stale_headers;
995 /* no cache handle, create a new entity only for non-HEAD requests */
996 if (!cache->handle && !r->header_only) {
997 char *key;
998 cache_handle_t *h = apr_pcalloc(r->pool, sizeof(cache_handle_t));
999 rv = cache_generate_key(r, r->pool, &key);
1000 if (rv != APR_SUCCESS) {
1001 return rv;
1003 rv = create_entity(h, r, key, size);
1004 if (rv != APR_SUCCESS) {
1005 return rv;
1007 cache->handle = h;
1008 info = apr_pcalloc(r->pool, sizeof(cache_info));
1009 /* We only set info->status upon the initial creation. */
1010 info->status = r->status;
1013 if (rv != OK) {
1014 /* Caching layer declined the opportunity to cache the response */
1015 ap_remove_output_filter(f);
1016 return ap_pass_brigade(f->next, in);
1019 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1020 "cache: Caching url: %s", r->unparsed_uri);
1022 /* We are actually caching this response. So it does not
1023 * make sense to remove this entity any more.
1025 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1026 "cache: Removing CACHE_REMOVE_URL filter.");
1027 //ap_remove_output_filter(cache->remove_url_filter);
1030 * We now want to update the cache file header information with
1031 * the new date, last modified, expire and content length and write
1032 * it away to our cache file. First, we determine these values from
1033 * the response, using heuristics if appropriate.
1035 * In addition, we make HTTP/1.1 age calculations and write them away
1036 * too.
1039 /* Read the date. Generate one if one is not supplied */
1040 dates = apr_table_get(r->err_headers_out, "Date");
1041 if (dates == NULL) {
1042 dates = apr_table_get(r->headers_out, "Date");
1044 if (dates != NULL) {
1045 info->date = apr_date_parse_http(dates);
1047 else {
1048 info->date = APR_DATE_BAD;
1051 now = apr_time_now();
1052 if (info->date == APR_DATE_BAD) { /* No, or bad date */
1053 /* no date header (or bad header)! */
1054 info->date = now;
1056 date = info->date;
1058 /* set response_time for HTTP/1.1 age calculations */
1059 info->response_time = now;
1061 /* get the request time */
1062 info->request_time = r->request_time;
1064 info->expire = exp;
1066 /* We found a stale entry which wasn't really stale. */
1067 if (cache->stale_handle) {
1068 /* Load in the saved status and clear the status line. */
1069 r->status = info->status;
1070 r->status_line = NULL;
1072 /* RFC 2616 10.3.5 states that entity headers are not supposed
1073 * to be in the 304 response. Therefore, we need to combine the
1074 * response headers with the cached headers *before* we update
1075 * the cached headers.
1077 * However, before doing that, we need to first merge in
1078 * err_headers_out and we also need to strip any hop-by-hop
1079 * headers that might have snuck in.
1081 r->headers_out = ap_cache_cacheable_headers_out(r);
1083 /* Merge in our cached headers. However, keep any updated values. */
1084 ap_cache_accept_headers(cache->handle, r, 1);
1087 /* Write away header information to cache. It is possible that we are
1088 * trying to update headers for an entity which has already been cached.
1090 * This may fail, due to an unwritable cache area. E.g. filesystem full,
1091 * permissions problems or a read-only (re)mount. This must be handled
1092 * later.
1094 rv = store_headers(cache->handle, r, info);
1096 if(rv != APR_SUCCESS) {
1097 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
1098 "cache: store_headers failed");
1099 ap_remove_output_filter(f);
1101 return ap_pass_brigade(f->next, in);
1104 rv = store_body(cache->handle, r, in);
1105 if (rv != APR_SUCCESS) {
1106 ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
1107 "cache: store_body failed");
1108 ap_remove_output_filter(f);
1111 return ap_pass_brigade(f->next, in);
1114 static void crccache_client_register_hook(apr_pool_t *p) {
1115 ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL,
1116 "Registering crccache client module, (C) 2009, Toby Collett");
1118 /* cache initializer */
1119 ap_hook_post_config(crccache_client_post_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
1120 /* cache handler */
1121 ap_hook_quick_handler(crccache_client_url_handler, NULL, NULL, APR_HOOK_FIRST);
1122 /* cache filters
1123 * XXX The cache filters need to run right after the handlers and before
1124 * any other filters. Consider creating AP_FTYPE_CACHE for this purpose.
1126 * Depending on the type of request (subrequest / main request) they
1127 * need to be run before AP_FTYPE_CONTENT_SET / after AP_FTYPE_CONTENT_SET
1128 * filters. Thus create two filter handles for each type:
1129 * cache_save_filter_handle / cache_out_filter_handle to be used by
1130 * main requests and
1131 * cache_save_subreq_filter_handle / cache_out_subreq_filter_handle
1132 * to be run by subrequest
1135 * CACHE_SAVE must go into the filter chain after a possible DEFLATE
1136 * filter to ensure that the compressed content is stored.
1137 * Incrementing filter type by 1 ensures his happens.
1138 * TODO: Revise this logic. In order for the crccache to work properly,
1139 * the plain text content must be cached and not the deflated content
1140 * Even more so, when receiving compressed content from the upstream
1141 * server, the cache_save_filter handler should uncompress it before
1142 * storing in the cache (but provide the compressed data to the client)
1144 cache_save_filter_handle =
1145 ap_register_output_filter("CACHE_SAVE",
1146 cache_save_filter,
1147 NULL,
1148 AP_FTYPE_CONTENT_SET+1);
1150 * CACHE_SAVE_SUBREQ must go into the filter chain before SUBREQ_CORE to
1151 * handle subrequsts. Decrementing filter type by 1 ensures this
1152 * happens.
1154 cache_save_subreq_filter_handle =
1155 ap_register_output_filter("CACHE_SAVE_SUBREQ",
1156 cache_save_filter,
1157 NULL,
1158 AP_FTYPE_CONTENT_SET-1);
1160 * CRCCACHE_DECODE must go into the filter chain after a possible DEFLATE
1161 * filter to ensure that already compressed cache objects do not
1162 * get compressed again. Incrementing filter type by 1 ensures
1163 * his happens.
1165 crccache_decode_filter_handle = ap_register_output_filter(
1166 "CRCCACHE_DECODE", crccache_decode_filter, NULL,
1167 AP_FTYPE_CONTENT_SET + 1);
1172 module AP_MODULE_DECLARE_DATA crccache_client_module = {
1173 STANDARD20_MODULE_STUFF, NULL, /* create per-directory config structure */
1174 NULL , /* merge per-directory config structures */
1175 crccache_client_create_config, /* create per-server config structure */
1176 NULL , /* merge per-server config structures */
1177 crccache_client_cmds, /* command apr_table_t */
1178 crccache_client_register_hook /* register hooks */